def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer):
    if network['name'] == 'resnet20': 
        lr_per_mb = [1.0]*80+[0.1]*40+[0.01]
    elif network['name'] == 'resnet110': 
        lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01]
    else: 
        return RuntimeError("Unknown model name!")

    momentum_time_constant = -minibatch_size/np.log(0.9)
    l2_reg_weight = 0.0001

    # Set learning parameters
    lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
    lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)
    
    # learner object
    if block_size != None and num_quantization_bits != 32:
        raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")

    local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule,
                                 l2_regularization_weight = l2_reg_weight)

    if block_size != None:
        learner = block_momentum_distributed_learner(local_learner, block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)
    
    return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_printer)
Exemple #2
0
def train_lm(testing=False):
    data = DataReader(token_to_id_path, segment_sepparator)

    # Create model nodes for the source and target inputs
    input_sequence, label_sequence = create_inputs(data.vocab_dim)

    # Create the model. It has three output nodes
    # z: the input to softmax that  provides the latent representation of the next token
    # cross_entropy: this is used training criterion
    # error: this a binary indicator if the model predicts the correct token
    z, cross_entropy, error = create_model(input_sequence, label_sequence, data.vocab_dim, hidden_dim)

    # For measurement we use the (build in) full softmax.
    full_ce = C.cross_entropy_with_softmax(z, label_sequence)

    # print out some useful training information
    log_number_of_parameters(z) ; print()
    
    # Run the training loop
    num_trained_samples = 0
    num_trained_samples_since_last_report = 0

    # Instantiate the trainer object to drive the model training
    lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample)
    momentum_schedule = momentum_as_time_constant_schedule(momentum_as_time_constant)
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule,
                            gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
                            gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    trainer = Trainer(z, (cross_entropy, error), learner)

    last_avg_ce = 0
    for epoch_count in range(num_epochs):
        for features, labels, token_count in data.minibatch_generator(train_file_path, sequence_length, sequences_per_batch):
            arguments = ({input_sequence : features, label_sequence : labels})

            t_start = timeit.default_timer()
            trainer.train_minibatch(arguments)
            t_end =  timeit.default_timer()

            samples_per_second = token_count / (t_end - t_start)

            # Print progress report every num_samples_between_progress_report samples

            if num_trained_samples_since_last_report >= num_samples_between_progress_report or num_trained_samples == 0:
                av_ce = average_cross_entropy(full_ce, input_sequence, label_sequence, data)
                print_progress(samples_per_second, av_ce, num_trained_samples, t_start)
                num_trained_samples_since_last_report = 0
                last_avg_ce = av_ce

            num_trained_samples += token_count
            num_trained_samples_since_last_report += token_count

        if not testing:
            # after each epoch save the model
            model_filename = "models/lm_epoch%d.dnn" % epoch_count
            z.save(model_filename)
            print("Saved model to '%s'" % model_filename)

    return last_avg_ce
def train_model(reader, model, criterion, epoch_size=50000, max_epochs=80):
    minibatch_size = 64

    # learning parameters
    learner = momentum_sgd(model.parameters, 
                           lr       = learning_rate_schedule([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], unit=UnitType.sample, epoch_size=epoch_size),
                           momentum = momentum_as_time_constant_schedule([0]*20+[600]*20+[1200], epoch_size=epoch_size),
                           l2_regularization_weight = 0.002)
    
    # trainer object
    trainer = Trainer(None, criterion, learner)

    # perform model training
    log_number_of_parameters(model) ; print()
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)

    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            mb = reader.next_minibatch(min(minibatch_size, epoch_size - sample_count)) # fetch minibatch.
            #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels])
            trainer.train_minibatch({criterion.arguments[0]: mb[reader.streams.features], criterion.arguments[1]: mb[reader.streams.labels]})
            sample_count += mb[reader.streams.labels].num_samples                     # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

        loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)
        model.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))

    # return evaluation error.
    return loss, metric # return values from last epoch
Exemple #4
0
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer):
    if network['name'] == 'resnet20': 
        lr_per_mb = [1.0]*80+[0.1]*40+[0.01]
    elif network['name'] == 'resnet110': 
        lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01]
    else: 
        return RuntimeError("Unknown model name!")

    momentum_time_constant = -minibatch_size/np.log(0.9)
    l2_reg_weight = 0.0001

    # Set learning parameters
    lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
    lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)
    
    # learner object
    if block_size != None and num_quantization_bits != 32:
        raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")

    local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule,
                                 l2_regularization_weight = l2_reg_weight)

    if block_size != None:
        learner = block_momentum_distributed_learner(local_learner, block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)
    
    return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_printer)
def create_trainer(network, epoch_size, num_quantization_bits, block_size,
                   warm_up):
    # Instantiate the trainer object to drive the model training
    lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch)
    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    clipping_threshold_per_sample = 2.3
    gradient_clipping_with_truncation = True

    # Create learner
    if block_size is not None and num_quantization_bits != default_quantization_bits:
        raise RuntimeError(
            "Block momentum cannot be used with quantization, please remove quantized_bits option."
        )

    local_learner = momentum_sgd(
        network['output'].parameters,
        lr_per_minibatch,
        momentum_time_constant,
        gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
        gradient_clipping_with_truncation=gradient_clipping_with_truncation)

    if block_size != None:
        learner = block_momentum_distributed_learner(local_learner,
                                                     block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(
            local_learner,
            num_quantization_bits=num_quantization_bits,
            distributed_after=warm_up)

    return Trainer(network['output'], (network['ce'], network['pe']), learner)
def init_trainer(config, text_lines, slot_value_lines):

    hidden_dim = config.hidden_dim

    segment_begin = config.segment_begin
    segment_end = config.segment_end

    data = DataReader(text_lines, slot_value_lines, segment_begin, segment_end)

    # Create model nodes for the source and target inputs
    vocab_dim = data.vocab_dim
    sv_dim = data.sv_dim

    input_sequence, sv_pair, label_sequence, inputH, inputC = create_inputs(hidden_dim, sv_dim, vocab_dim)
    model = create_model(hidden_dim, sv_dim, vocab_dim)
    z = model(input_sequence, inputH, inputC, sv_pair)
    # cross_entropy: this is used training criterion
    ce, err = cross_entropy_with_full_softmax(z, label_sequence, sv_dim, vocab_dim)

    learning_rate = config.learning_rate
    momentum_as_time_constant = config.momentum_as_time_constant
    clipping_threshold_per_sample = config.clipping_threshold_per_sample
    lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample)
    gradient_clipping_with_truncation = True
    momentum_schedule = momentum_as_time_constant_schedule(momentum_as_time_constant)
    # Instantiate the trainer object to drive the model training
    learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule,
			gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
			gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    trainer = Trainer(z, (ce, err), learner)
    inputs = [input_sequence, sv_pair, label_sequence, inputH, inputC]

    return data, z, trainer, inputs
def Evaluator(criterion):
    loss, metric = Trainer._get_loss_metric(criterion)
    parameters = set(loss.parameters)
    if metric:
        parameters |= set(metric.parameters)
    dummy_learner = momentum_sgd(tuple(parameters), 
                                 lr = learning_rate_schedule(1, UnitType.minibatch),
                                 momentum = momentum_as_time_constant_schedule(0))
    return Trainer(None, (loss, metric), dummy_learner)
Exemple #8
0
def train_lm(training_file, epochs, max_num_minibatches):

    # load the data and vocab
    data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(training_file)

    # Model the source and target inputs to the model
    input_sequence, label_sequence = create_inputs(vocab_dim)

    # create the model
    model = create_model(vocab_dim)
    
    # and apply it to the input sequence    
    z = model(input_sequence)

    # setup the criterions (loss and metric)
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # Instantiate the trainer object to drive the model training
    lr_per_sample = learning_rate_schedule(0.001, UnitType.sample)
    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    clipping_threshold_per_sample = 5.0
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                           gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
                           gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    progress_printer = ProgressPrinter(freq=100, tag='Training')
    trainer = Trainer(z, (ce, errs), learner, progress_printer)

    sample_freq = 1000
    minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs)

    # print out some useful training information
    log_number_of_parameters(z)
    print ("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch))
    print()

    for e in range(0, epochs):
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        # If it's the start of the data, we specify that we are looking at a new sequence (True)
        mask = [True]
        for b in range(0, minibatches_per_epoch):
            # get the data            
            features, labels = get_data(b, minibatch_size, data, char_to_ix, vocab_dim)
            arguments = ({input_sequence : features, label_sequence : labels}, mask)
            mask = [False] 
            trainer.train_minibatch(arguments)

            global_minibatch = e*minibatches_per_epoch + b
            if global_minibatch % sample_freq == 0:
                print(sample(z, ix_to_char, vocab_dim, char_to_ix))

        model_filename = "models/shakespeare_epoch%d.dnn" % (e+1)
        z.save(model_filename)
        print("Saved model to '%s'" % model_filename)
Exemple #9
0
def train_fast_rcnn(debug_output=False):
    if debug_output:
        print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output"))

    # Create the minibatch source
    minibatch_source = create_mb_source(image_height, image_width, num_channels,
                                        num_classes, num_rois, base_path, "train")

    # Input variables denoting features, rois and label data
    image_input = input((num_channels, image_height, image_width))
    roi_input   = input((num_rois, 4))
    label_input = input((num_rois, num_classes))

    # define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source.streams.features,
        roi_input: minibatch_source.streams.rois,
        label_input: minibatch_source.streams.roiLabels
    }

    # Instantiate the Fast R-CNN prediction model and loss function
    frcn_output = frcn_predictor(image_input, roi_input, num_classes)
    ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1)
    pe = classification_error(frcn_output, label_input, axis=1)
    if debug_output:
        plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png"))

    # Set learning parameters
    l2_reg_weight = 0.0005
    lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001]
    lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    # Instantiate the trainer object
    learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer)

    # Get minibatches of images and perform model training
    print("Training Fast R-CNN model for %s epochs." % max_epochs)
    log_number_of_parameters(frcn_output)
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map)
            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far

        trainer.summarize_training_progress()
        if debug_output:
            frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1)))

    return frcn_output
def train_model(debug_output=False):
    # Create the minibatch source
    minibatch_source = create_reader(map_file)

    # Input variables denoting features, rois and label data
    image_input = input_variable((num_channels, image_height, image_width))
    label_input = input_variable((num_classes))

    # define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source.streams.features,
        label_input: minibatch_source.streams.labels
    }

    # Instantiate the Fast R-CNN prediction model and loss function
    model = modify_model(image_input, num_classes)
    ce = cross_entropy_with_softmax(model, label_input)
    pe = classification_error(model, label_input)

    # Set learning parameters
    l2_reg_weight = 0.0005
    lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001]
    momentum_time_constant = 10
    lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    # Instantiate the trainer object
    progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)]
    learner = momentum_sgd(model.parameters,
                           lr_schedule,
                           mm_schedule,
                           l2_regularization_weight=l2_reg_weight)
    trainer = Trainer(model, (ce, pe), learner, progress_writers)

    # Get minibatches of images and perform model training
    print("Training image classifier for %s epochs." % max_epochs)
    log_number_of_parameters(model)

    for epoch in range(max_epochs):
        sample_count = 0
        while sample_count < epoch_size:
            data = minibatch_source.next_minibatch(min(
                mb_size, epoch_size - sample_count),
                                                   input_map=input_map)
            trainer.train_minibatch(data)
            sample_count += trainer.previous_minibatch_sample_count

        trainer.summarize_training_progress()
        model.save(
            os.path.join(output_model_folder,
                         'withcrops_{}.dnn'.format(epoch + 1)))

    return
def train_fast_rcnn(debug_output=False, model_path=model_file):
    if debug_output:
        print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output"))

    # Create the minibatch source
    minibatch_source = create_mb_source(image_height, image_width, num_channels,
                                        num_classes, num_rois, base_path, "train")

    # Input variables denoting features, rois and label data
    image_input = C.input_variable((num_channels, image_height, image_width))
    roi_input   = C.input_variable((num_rois, 4))
    label_input = C.input_variable((num_rois, num_classes))

    # define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source.streams.features,
        roi_input: minibatch_source.streams.rois,
        label_input: minibatch_source.streams.roiLabels
    }

    # Instantiate the Fast R-CNN prediction model and loss function
    frcn_output = frcn_predictor(image_input, roi_input, num_classes, model_path)
    ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1)
    pe = classification_error(frcn_output, label_input, axis=1)
    if debug_output:
        plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png"))

    # Set learning parameters
    l2_reg_weight = 0.0005
    lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001]
    lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    # Instantiate the trainer object
    learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer)

    # Get minibatches of images and perform model training
    print("Training Fast R-CNN model for %s epochs." % max_epochs)
    log_number_of_parameters(frcn_output)
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map)
            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far

        trainer.summarize_training_progress()
        if debug_output:
            frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1)))

    return frcn_output
def Evaluator(model, criterion):
    from cntk import Trainer
    from cntk.learners import momentum_sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule
    loss, metric = Trainer._get_loss_metric(criterion)
    parameters = set(loss.parameters)
    if model:
        parameters |= set(model.parameters)
    if metric:
        parameters |= set(metric.parameters)
    dummy_learner = momentum_sgd(tuple(parameters),
                                 lr = learning_rate_schedule(1, UnitType.minibatch),
                                 momentum = momentum_as_time_constant_schedule(0))
    return Trainer(model, (loss, metric), dummy_learner)
Exemple #13
0
def train_and_test(s2smodel, train_reader, test_reader, block_size,
                   num_quantization_bits, max_epochs, epoch_size,
                   minibatch_size, progress_printer, warm_up):
    from Sequence2Sequence import create_criterion_function, create_model_train
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # Create learner
    if block_size is not None and num_quantization_bits != default_quantization_bits:
        raise RuntimeError(
            "Block momentum cannot be used with quantization, please remove quantized_bits option."
        )

    lr = 0.001 if use_attention else 0.005  # TODO: can we use the same value for both?
    local_learner = fsadagrad(
        model_train.parameters,
        lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4],
                                  UnitType.sample, epoch_size),
        momentum=momentum_as_time_constant_schedule(1100),
        gradient_clipping_threshold_per_sample=2.3,
        gradient_clipping_with_truncation=True)

    if block_size != None:
        learner = block_momentum_distributed_learner(local_learner,
                                                     block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(
            local_learner,
            num_quantization_bits=num_quantization_bits,
            distributed_after=warm_up)

    trainer = Trainer(None, criterion, learner, progress_printer)

    train_bind = {
        criterion.arguments[0]: train_reader.streams.features,
        criterion.arguments[1]: train_reader.streams.labels
    }

    training_session(
        mb_source=train_reader,
        trainer=trainer,
        model_inputs_to_streams=train_bind,
        mb_size=minibatch_size,
        progress_frequency=epoch_size,
        checkpoint_config=CheckpointConfig(frequency=epoch_size,
                                           filename=os.path.join(
                                               model_path,
                                               "SequenceToSequence"),
                                           restore=False),
        cv_config=CrossValidationConfig(source=test_reader,
                                        mb_size=minibatch_size)).train()
Exemple #14
0
def train(reader, model, max_epochs):

    # declare the model's input dimension, so that the saved model is usable
    model.update_signature(Sequence[SparseTensor[vocab_size]])
    #model.declare_args(vocab_size)

    # criterion: (model args, labels) -> (loss, metric)
    #   here  (query, slot_labels) -> (ce, errs)
    criterion = create_criterion_function(model)

    labels = reader.streams.slot_labels
    #labels = reader.streams.intent_labels  # for intent classification

    #from cntk.logging.graph import plot
    #plot(criterion, filename=data_dir + "/model.pdf")

    # iteration parameters  --needed here because learner schedule needs it
    epoch_size = 36000
    minibatch_size = 70
    #epoch_size = 1000 ; max_epochs = 1 # uncomment for faster testing

    # SGD parameters
    learner = fsadagrad(criterion.parameters,
                        lr         = learning_rate_schedule([0.003]*2+[0.0015]*12+[0.0003], UnitType.sample, epoch_size),
                        momentum   = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)),
                        gradient_clipping_threshold_per_sample = 15,
                        gradient_clipping_with_truncation = True)

    # trainer
    trainer = Trainer(None, criterion, learner)

    # process minibatches and perform model training
    log_number_of_parameters(model) ; print()
    progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging
    #progress_printer = ProgressPrinter(tag='Training')

    t = 0
    for epoch in range(max_epochs):         # loop over epochs
        peek(model, epoch)                  # log some interesting info
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end:                # loop over minibatches on the epoch
            # BUGBUG: The change of minibatch_size parameter vv has no effect.
            # TODO: change all examples to this pattern; then remove this comment
            data = reader.next_minibatch(min(minibatch_size, epoch_end-t))     # fetch minibatch
            #trainer.train_minibatch(data[reader.streams.query], data[labels])  # update model with it
            trainer.train_minibatch({criterion.arguments[0]: data[reader.streams.query], criterion.arguments[1]: data[labels]})  # update model with it
            t += data[labels].num_samples                                      # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True)    # log progress
        loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)

    return loss, metric # return values from last epoch
def train_model(reader, reader_test, model, epoch_size=50000, max_epochs=80):

    # declare the model's input dimension
    # Training does not require this, but it is needed for deployment.
    model.update_signature((num_channels, image_height, image_width))

    # criterion function. This is what is being trained trained.
    # Model gets "sandwiched" between normalization (not part of model proper) and criterion.
    criterion = create_criterion_function(model, normalize=lambda x: x / 256)
    #debughelpers.dump_function(criterion, 'criterion')

    #from cntk.logging.graph import plot
    #plot(criterion, filename=os.path.join(model_path, "ConvNet_CIFAR10_DataAug.pdf"))

    # iteration parameters
    minibatch_size = 64
    #epoch_size = 1000 ; max_epochs = 1 # for faster testing

    # learning parameters
    learner = momentum_sgd(model.parameters, 
                           lr       = learning_rate_schedule([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], unit=UnitType.sample, epoch_size=epoch_size),
                           momentum = momentum_as_time_constant_schedule([0]*20+[600]*20+[1200], epoch_size=epoch_size),
                           l2_regularization_weight = 0.002)
    
    # trainer object
    trainer = Trainer(None, criterion, learner)

    # perform model training
    log_number_of_parameters(model) ; print()
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)

    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            mb = reader.next_minibatch(min(minibatch_size, epoch_size - sample_count)) # fetch minibatch.
            #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels])
            trainer.train_minibatch({criterion.arguments[0]: mb[reader.streams.features], criterion.arguments[1]: mb[reader.streams.labels]})
            sample_count += mb[reader.streams.labels].num_samples                     # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
        loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)
        model.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))

    # return evaluation error.
    return loss, metric # return values from last epoch
def train_and_test(s2smodel, train_reader, test_reader, block_size, num_quantization_bits, max_epochs, epoch_size, minibatch_size, progress_printer, warm_up):
    from Sequence2Sequence import create_criterion_function, create_model_train
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # Create learner
    if block_size is not None and num_quantization_bits != default_quantization_bits:
        raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")

    lr = 0.001 if use_attention else 0.005   # TODO: can we use the same value for both?
    local_learner = fsadagrad(model_train.parameters,
                        lr       = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size),
                        momentum = momentum_as_time_constant_schedule(1100),
                        gradient_clipping_threshold_per_sample=2.3,
                        gradient_clipping_with_truncation=True)

    if block_size != None:
        learner = block_momentum_distributed_learner(local_learner, block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)

    trainer = Trainer(None, criterion, learner, progress_printer)

    train_bind = {criterion.arguments[0]: train_reader.streams.features,
                  criterion.arguments[1]: train_reader.streams.labels}

    training_session(
        mb_source = train_reader,
        trainer=trainer,
        model_inputs_to_streams=train_bind,
        mb_size=minibatch_size,
        progress_frequency=epoch_size,
        checkpoint_config=CheckpointConfig(frequency = epoch_size,
                                           filename = os.path.join(model_path, "SequenceToSequence"),
                                           restore = False),
        cv_config=CrossValidationConfig(source=test_reader, mb_size=minibatch_size)
    ).train()
Exemple #17
0
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs,
          epoch_size):

    # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause
    # an error since the training criterion uses a reduced sequence axis for the labels.
    # This is because it removes the initial <s> symbol. Hence, we must leave the model
    # with unspecified input shapes and axes.

    # create the training wrapper for the s2smodel, as well as the criterion function
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # also wire in a greedy decoder so that we can properly log progress on a validation example
    # This is not used for the actual training process.
    model_greedy = create_model_greedy(s2smodel)

    # This does not need to be done in training generally though
    # Instantiate the trainer object to drive the model training
    minibatch_size = 72
    lr = 0.001 if use_attention else 0.005  # TODO: can we use the same value for both?
    learner = adam_sgd(
        model_train.parameters,
        lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4],
                                  UnitType.sample, epoch_size),
        momentum=momentum_as_time_constant_schedule(1100),
        gradient_clipping_threshold_per_sample=2.3,
        gradient_clipping_with_truncation=True)
    trainer = Trainer(None, criterion, learner)

    # Get minibatches of sequences to train with and perform model training
    total_samples = 0
    mbs = 0
    eval_freq = 100

    # print out some useful training information
    log_number_of_parameters(model_train)
    print()
    progress_printer = ProgressPrinter(freq=30, tag='Training')
    #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file

    sparse_to_dense = create_sparse_to_dense(input_vocab_dim)

    for epoch in range(max_epochs):
        print("Saving model to '%s'" % model_path(epoch))
        s2smodel.save(model_path(epoch))

        while total_samples < (epoch + 1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)
            #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels])
            trainer.train_minibatch({
                criterion.arguments[0]:
                mb_train[train_reader.streams.features],
                criterion.arguments[1]:
                mb_train[train_reader.streams.labels]
            })

            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % eval_freq == 0:
                mb_valid = valid_reader.next_minibatch(1)

                # run an eval on the decoder output model (i.e. don't use the groundtruth)
                e = model_greedy(mb_valid[valid_reader.streams.features])
                print(
                    format_sequences(
                        sparse_to_dense(
                            mb_valid[valid_reader.streams.features]), i2w))
                print("->")
                print(format_sequences(e, i2w))

                # debugging attention
                if use_attention:
                    debug_attention(model_greedy,
                                    mb_valid[valid_reader.streams.features])

            total_samples += mb_train[train_reader.streams.labels].num_samples
            mbs += 1

        # log a summary of the stats for the epoch
        progress_printer.epoch_summary(with_metric=True)

    # done: save the final model
    print("Saving final model to '%s'" % model_path(max_epochs))
    s2smodel.save(model_path(max_epochs))
    print("%d epochs complete." % max_epochs)
model = Sequential([
    For(
        range(2), lambda: Sequential(
            [Stabilizer(),
             Recurrence(LSTM(256), go_backwards=False)])),
    Dense(vocab_size)
])

z = model(input_sequence)
z_sm = cntk.softmax(z)

ce = cross_entropy_with_softmax(z, label_sequence)
errs = classification_error(z, label_sequence)

lr_per_sample = learning_rate_schedule(0.001, UnitType.sample)
momentum_time_constant = momentum_as_time_constant_schedule(1100)
clipping_threshold_per_sample = 5.0
gradient_clipping_with_truncation = True
learner = momentum_sgd(
    z.parameters,
    lr_per_sample,
    momentum_time_constant,
    gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
    gradient_clipping_with_truncation=gradient_clipping_with_truncation)
progress_printer = ProgressPrinter(freq=100, tag='Training')
trainer = Trainer(z, (ce, errs), learner, progress_printer)

log_number_of_parameters(z)


def sample(net, prime_text='', use_hardmax=True, length=100, temperature=1.0):
def train_and_evaluate(reader,
                       reader_test,
                       model,
                       epoch_size=50000,
                       max_epochs=5):

    # declare the model's input dimension
    # Training does not require this, but it is needed for deployment.
    model.update_signature((num_channels, image_height, image_width))

    # criterion function. This is what is being trained trained.
    # Model gets "sandwiched" between normalization (not part of model proper) and criterion.
    criterion = create_criterion_function(model, normalize=lambda x: x / 256)
    #debughelpers.dump_function(criterion, 'criterion')

    #from cntk.logging.graph import plot
    #plot(criterion, filename=os.path.join(model_path, "ConvNet_CIFAR10_DataAug.pdf"))

    # iteration parameters
    minibatch_size = 64
    #epoch_size = 1000 ; max_epochs = 1 # for faster testing

    # learning parameters
    learner = momentum_sgd(
        model.parameters,
        lr=learning_rate_schedule([0.0015625] * 20 + [0.00046875] * 20 +
                                  [0.00015625] * 20 + [0.000046875] * 10 +
                                  [0.000015625],
                                  unit=UnitType.sample,
                                  epoch_size=epoch_size),
        momentum=momentum_as_time_constant_schedule([0] * 20 + [600] * 20 +
                                                    [1200],
                                                    epoch_size=epoch_size),
        l2_regularization_weight=0.002)

    # trainer object
    trainer = Trainer(None, criterion, learner)

    # perform model training
    log_number_of_parameters(model)
    print()
    progress_printer = ProgressPrinter(tag='Training')

    for epoch in range(max_epochs):  # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            mb = reader.next_minibatch(
                min(minibatch_size,
                    epoch_size - sample_count))  # fetch minibatch.
            #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels])
            trainer.train_minibatch({
                criterion.arguments[0]:
                mb[reader.streams.features],
                criterion.arguments[1]:
                mb[reader.streams.labels]
            })
            sample_count += mb[
                reader.streams.
                labels].num_samples  # count samples processed so far
            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress
        loss, metric, actual_samples = progress_printer.epoch_summary(
            with_metric=True)
        model.save(
            os.path.join(model_path,
                         "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))
        progress_printer.epoch_summary(with_metric=True)

    # TODO: we should be done here
    #return metric_numer/metric_denom

    ### Evaluation action

    # evaluate with current Trainer instance; just to make sure we save and load the model correctly and BN works now --TODO: delete once confirmed
    epoch_size = 10000
    minibatch_size = 16
    metric_numer = 0
    metric_denom = 0
    sample_count = 0
    minibatch_index = 0

    while sample_count < epoch_size:
        mbsize = min(minibatch_size, epoch_size - sample_count)
        mb = reader_test.next_minibatch(mbsize)
        metric_numer += mbsize * trainer.test_minibatch(
            {
                criterion.arguments[0]: mb[reader_test.streams.features],
                criterion.arguments[1]: mb[reader_test.streams.labels]
            })
        metric_denom += mbsize
        sample_count += mb[reader_test.streams.labels].num_samples
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(
        minibatch_index + 1, (metric_numer * 100.0) / metric_denom,
        metric_denom))
    print("")

    # return evaluation error.
    return loss, metric  # return values from last epoch
Exemple #20
0
def train(reader, model, max_epochs):

    # declare the model's input dimension, so that the saved model is usable
    model.update_signature(Sequence[SparseTensor[vocab_size]])
    #model.declare_args(vocab_size)

    # criterion: (model args, labels) -> (loss, metric)
    #   here  (query, slot_labels) -> (ce, errs)
    criterion = create_criterion_function(model)

    labels = reader.streams.slot_labels
    #labels = reader.streams.intent_labels  # for intent classification

    #from cntk.logging.graph import plot
    #plot(criterion, filename=data_dir + "/model.pdf")

    # iteration parameters  --needed here because learner schedule needs it
    epoch_size = 36000
    minibatch_size = 70
    #epoch_size = 1000 ; max_epochs = 1 # uncomment for faster testing

    # SGD parameters
    learner = fsadagrad(
        criterion.parameters,
        lr=learning_rate_schedule([0.003] * 2 + [0.0015] * 12 + [0.0003],
                                  UnitType.sample, epoch_size),
        momentum=momentum_as_time_constant_schedule(minibatch_size /
                                                    -math.log(0.9)),
        gradient_clipping_threshold_per_sample=15,
        gradient_clipping_with_truncation=True)

    # trainer
    trainer = Trainer(None, criterion, learner)

    # process minibatches and perform model training
    log_number_of_parameters(model)
    print()
    progress_printer = ProgressPrinter(freq=100, first=10,
                                       tag='Training')  # more detailed logging
    #progress_printer = ProgressPrinter(tag='Training')

    t = 0
    for epoch in range(max_epochs):  # loop over epochs
        peek(model, epoch)  # log some interesting info
        epoch_end = (epoch + 1) * epoch_size
        while t < epoch_end:  # loop over minibatches on the epoch
            # BUGBUG: The change of minibatch_size parameter vv has no effect.
            # TODO: change all examples to this pattern; then remove this comment
            data = reader.next_minibatch(min(minibatch_size,
                                             epoch_end - t))  # fetch minibatch
            #trainer.train_minibatch(data[reader.streams.query], data[labels])  # update model with it
            trainer.train_minibatch({
                criterion.arguments[0]:
                data[reader.streams.query],
                criterion.arguments[1]:
                data[labels]
            })  # update model with it
            t += data[labels].num_samples  # count samples processed so far
            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress
        loss, metric, actual_samples = progress_printer.epoch_summary(
            with_metric=True)

    return loss, metric  # return values from last epoch
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size):

    # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause
    # an error since the training criterion uses a reduced sequence axis for the labels.
    # This is because it removes the initial <s> symbol. Hence, we must leave the model
    # with unspecified input shapes and axes.

    # create the training wrapper for the s2smodel, as well as the criterion function
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # also wire in a greedy decoder so that we can properly log progress on a validation example
    # This is not used for the actual training process.
    model_greedy = create_model_greedy(s2smodel)

    # This does not need to be done in training generally though
    # Instantiate the trainer object to drive the model training
    minibatch_size = 72
    lr = 0.001 if use_attention else 0.005   # TODO: can we use the same value for both?
    learner = fsadagrad(model_train.parameters,
                        lr       = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size),
                        momentum = momentum_as_time_constant_schedule(1100),
                        gradient_clipping_threshold_per_sample=2.3,
                        gradient_clipping_with_truncation=True)
    trainer = Trainer(None, criterion, learner)

    # Get minibatches of sequences to train with and perform model training
    total_samples = 0
    mbs = 0
    eval_freq = 100

    # print out some useful training information
    log_number_of_parameters(model_train) ; print()
    progress_printer = ProgressPrinter(freq=30, tag='Training')
    #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file

    sparse_to_dense = create_sparse_to_dense(input_vocab_dim)

    for epoch in range(max_epochs):
        print("Saving model to '%s'" % model_path(epoch))
        s2smodel.save(model_path(epoch))

        while total_samples < (epoch+1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)
            #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels])
            trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels]})

            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % eval_freq == 0:
                mb_valid = valid_reader.next_minibatch(1)

                # run an eval on the decoder output model (i.e. don't use the groundtruth)
                e = model_greedy(mb_valid[valid_reader.streams.features])
                print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w))
                print("->")
                print(format_sequences(e, i2w))

                # debugging attention
                if use_attention:
                    debug_attention(model_greedy, mb_valid[valid_reader.streams.features])

            total_samples += mb_train[train_reader.streams.labels].num_samples
            mbs += 1

        # log a summary of the stats for the epoch
        progress_printer.epoch_summary(with_metric=True)

    # done: save the final model
    print("Saving final model to '%s'" % model_path(max_epochs))
    s2smodel.save(model_path(max_epochs))
    print("%d epochs complete." % max_epochs)
Exemple #22
0
def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
    # Replace 0 with 1 to get detailed log.
    set_computation_network_trace_level(0)

    # These values must match for both train and test reader.
    image_height = train_reader.height
    image_width = train_reader.width
    num_channels = train_reader.channel_count
    sequence_length = train_reader.sequence_length
    num_output_classes = train_reader.label_count

    # Input variables denoting the features and label data
    input_var = input(
        (num_channels, sequence_length, image_height, image_width), np.float32)
    label_var = input(num_output_classes, np.float32)

    # Instantiate simple 3D Convolution network inspired by VGG network
    # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf
    with default_options(activation=relu):
        z = Sequential([
            Convolution3D((3, 3, 3), 64, pad=True),
            MaxPooling((1, 2, 2), (1, 2, 2)),
            For(
                range(3), lambda i: [
                    Convolution3D((3, 3, 3), [96, 128, 128][i], pad=True),
                    Convolution3D((3, 3, 3), [96, 128, 128][i], pad=True),
                    MaxPooling((2, 2, 2), (2, 2, 2))
                ]),
            For(range(2), lambda: [Dense(1024), Dropout(0.5)]),
            Dense(num_output_classes, activation=None)
        ])(input_var)

    # loss and classification error.
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)

    # training config
    epoch_size = 1322  # for now we manually specify epoch size
    minibatch_size = 4

    # Set learning parameters
    lr_per_sample = [0.01] * 10 + [0.001] * 10 + [0.0001]
    lr_schedule = learning_rate_schedule(lr_per_sample,
                                         epoch_size=epoch_size,
                                         unit=UnitType.sample)
    momentum_time_constant = 4096
    mm_schedule = momentum_as_time_constant_schedule([momentum_time_constant],
                                                     epoch_size=epoch_size)

    # Instantiate the trainer object to drive the model training
    learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = Trainer(z, (ce, pe), learner, progress_printer)

    log_number_of_parameters(z)
    print()

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):  # loop over epochs
        train_reader.reset()

        while train_reader.has_more():
            videos, labels, current_minibatch = train_reader.next_minibatch(
                minibatch_size)
            trainer.train_minibatch({input_var: videos, label_var: labels})

        trainer.summarize_training_progress()

    # Test data for trained model
    epoch_size = 332
    minibatch_size = 2

    # process minibatches and evaluate the model
    metric_numer = 0
    metric_denom = 0
    minibatch_index = 0

    test_reader.reset()
    while test_reader.has_more():
        videos, labels, current_minibatch = test_reader.next_minibatch(
            minibatch_size)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch({
            input_var: videos,
            label_var: labels
        }) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(
        minibatch_index + 1, (metric_numer * 100.0) / metric_denom,
        metric_denom))
    print("")

    return metric_numer / metric_denom
Exemple #23
0
def train_model(reader, reader_test, model, epoch_size=50000, max_epochs=80):

    # declare the model's input dimension
    # Training does not require this, but it is needed for deployment.
    model.update_signature((num_channels, image_height, image_width))

    # criterion function. This is what is being trained trained.
    # Model gets "sandwiched" between normalization (not part of model proper) and criterion.
    criterion = create_criterion_function(model, normalize=lambda x: x / 256)
    #debughelpers.dump_function(criterion, 'criterion')

    #from cntk.logging.graph import plot
    #plot(criterion, filename=os.path.join(model_path, "ConvNet_CIFAR10_DataAug.pdf"))

    # iteration parameters
    minibatch_size = 64
    #epoch_size = 1000 ; max_epochs = 1 # for faster testing

    # learning parameters
    learner = momentum_sgd(
        model.parameters,
        lr=learning_rate_schedule([0.0015625] * 20 + [0.00046875] * 20 +
                                  [0.00015625] * 20 + [0.000046875] * 10 +
                                  [0.000015625],
                                  unit=UnitType.sample,
                                  epoch_size=epoch_size),
        momentum=momentum_as_time_constant_schedule([0] * 20 + [600] * 20 +
                                                    [1200],
                                                    epoch_size=epoch_size),
        l2_regularization_weight=0.002)

    # trainer object
    trainer = Trainer(None, criterion, learner)

    # perform model training
    log_number_of_parameters(model)
    print()
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)

    for epoch in range(max_epochs):  # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            mb = reader.next_minibatch(
                min(minibatch_size,
                    epoch_size - sample_count))  # fetch minibatch.
            #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels])
            trainer.train_minibatch({
                criterion.arguments[0]:
                mb[reader.streams.features],
                criterion.arguments[1]:
                mb[reader.streams.labels]
            })
            sample_count += mb[
                reader.streams.
                labels].num_samples  # count samples processed so far
            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress
        loss, metric, actual_samples = progress_printer.epoch_summary(
            with_metric=True)
        model.save(
            os.path.join(model_path,
                         "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))

    # return evaluation error.
    return loss, metric  # return values from last epoch
def init_train_fast_rcnn(image_height,
                         image_width,
                         num_classes,
                         num_rois,
                         mb_size,
                         max_epochs,
                         cntk_lr_per_image,
                         l2_reg_weight,
                         momentum_time_constant,
                         base_path,
                         boSkipTraining=False,
                         debug_output=False,
                         tensorboardLogDir=None):

    #make sure we use GPU for training
    if use_default_device().type() == 0:
        print("WARNING: using CPU for training.")
    else:
        print("Using GPU for training.")

    # Instantiate the Fast R-CNN prediction model
    image_input = input_variable((3, image_height, image_width))
    roi_input = input_variable((num_rois, 4))
    label_input = input_variable((num_rois, num_classes))
    frcn_output, frcn_penultimateLayer = frcn_predictor(
        image_input, roi_input, num_classes, base_path)

    if boSkipTraining:
        print("Using pre-trained DNN without refinement")
        return frcn_penultimateLayer

    # Create the minibatch source and define mapping from reader streams to network inputs
    minibatch_source, epoch_size = create_mb_source("train",
                                                    image_height,
                                                    image_width,
                                                    num_classes,
                                                    num_rois,
                                                    base_path,
                                                    randomize=True)
    input_map = {
        image_input: minibatch_source.streams.features,
        roi_input: minibatch_source.streams.rois,
        label_input: minibatch_source.streams.roiLabels
    }

    # set loss / error functions
    ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1)
    pe = classification_error(frcn_output, label_input, axis=1)
    if debug_output:
        plot(frcn_output, "graph_frcn.png")

    # set the progress printer(s)
    progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)]
    if tensorboardLogDir != None:
        tensorboard_writer = TensorBoardProgressWriter(
            freq=10, log_dir=tensorboardLogDir, model=frcn_output)
        progress_writers.append(tensorboard_writer)

    # Set learning parameters and instantiate the trainer object
    lr_per_sample = [f / float(num_rois) for f in cntk_lr_per_image]
    lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)
    learner = momentum_sgd(frcn_output.parameters,
                           lr_schedule,
                           mm_schedule,
                           l2_regularization_weight=l2_reg_weight)
    trainer = Trainer(frcn_output, (ce, pe), learner, progress_writers)

    # Get minibatches of images and perform model training
    print("Training Fast R-CNN model for %s epochs." % max_epochs)
    log_number_of_parameters(frcn_output)
    for epoch in range(max_epochs):
        sample_count = 0

        # loop over minibatches in the epoch
        while sample_count < epoch_size:
            data = minibatch_source.next_minibatch(min(
                mb_size, epoch_size - sample_count),
                                                   input_map=input_map)
            if sample_count % 100 == 1:
                print(
                    "Training in progress: epoch {} of {}, sample count {} of {}"
                    .format(epoch, max_epochs, sample_count, epoch_size))
            trainer.train_minibatch(data)
            sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far
        trainer.summarize_training_progress()

        # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed.
        if tensorboardLogDir != None:
            for parameter in frcn_output.parameters:
                tensorboard_writer.write_value(parameter.uid + "/mean",
                                               np.mean(parameter.value), epoch)
                tensorboard_writer.write_value(parameter.uid + "/std",
                                               np.std(parameter.value), epoch)
                tensorboard_writer.write_value(parameter.uid + "/absSum",
                                               np.sum(np.abs(parameter.value)),
                                               epoch)

        if debug_output:
            frcn_output.save_model("frcn_py_%s.model" % (epoch + 1))
    return frcn_output
Exemple #25
0
def train_and_evaluate(reader_train,
                       reader_test,
                       network_name,
                       epoch_size,
                       max_epochs,
                       profiler_dir=None,
                       model_dir=None,
                       log_dir=None,
                       tensorboard_logdir=None,
                       gen_heartbeat=False):

    set_computation_network_trace_level(0)

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width),
                                 name='features')
    label_var = C.input_variable((num_classes))

    # create model, and configure learning parameters
    if network_name == 'resnet20':
        z = create_cifar10_model(input_var, 3, num_classes)
        lr_per_mb = [1.0] * 80 + [0.1] * 40 + [0.01]
    elif network_name == 'resnet110':
        z = create_cifar10_model(input_var, 18, num_classes)
        lr_per_mb = [0.1] * 1 + [1.0] * 80 + [0.1] * 40 + [0.01]
    else:
        raise RuntimeError("Unknown model name!")

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)

    # shared training parameters
    minibatch_size = 128
    momentum_time_constant = -minibatch_size / np.log(0.9)
    l2_reg_weight = 0.0001

    # Set learning parameters
    lr_per_sample = [lr / minibatch_size for lr in lr_per_mb]
    lr_schedule = learning_rate_schedule(lr_per_sample,
                                         epoch_size=epoch_size,
                                         unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    # progress writers
    progress_writers = [
        ProgressPrinter(tag='Training',
                        log_to_file=log_dir,
                        num_epochs=max_epochs,
                        gen_heartbeat=gen_heartbeat)
    ]
    tensorboard_writer = None
    if tensorboard_logdir is not None:
        tensorboard_writer = TensorBoardProgressWriter(
            freq=10, log_dir=tensorboard_logdir, model=z)
        progress_writers.append(tensorboard_writer)

    # trainer object
    learner = momentum_sgd(z.parameters,
                           lr_schedule,
                           mm_schedule,
                           l2_regularization_weight=l2_reg_weight)
    trainer = Trainer(z, (ce, pe), learner, progress_writers)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    log_number_of_parameters(z)
    print()

    # perform model training
    if profiler_dir:
        start_profiler(profiler_dir, True)

    for epoch in range(max_epochs):  # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(
                min(minibatch_size, epoch_size - sample_count),
                input_map=input_map)  # fetch minibatch.
            trainer.train_minibatch(data)  # update model with it
            sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far

        trainer.summarize_training_progress()

        # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed.
        if tensorboard_writer:
            for parameter in z.parameters:
                tensorboard_writer.write_value(parameter.uid + "/mean",
                                               reduce_mean(parameter).eval(),
                                               epoch)

        if model_dir:
            z.save(
                os.path.join(model_dir,
                             network_name + "_{}.dnn".format(epoch)))
        enable_profiler()  # begin to collect profiler data after first epoch

    if profiler_dir:
        stop_profiler()

    # Evaluation parameters
    test_epoch_size = 10000
    minibatch_size = 16

    # process minibatches and evaluate the model
    metric_numer = 0
    metric_denom = 0
    sample_count = 0

    while sample_count < test_epoch_size:
        current_minibatch = min(minibatch_size, test_epoch_size - sample_count)
        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch,
                                          input_map=input_map)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples

    print("")
    trainer.summarize_test_progress()
    print("")

    return metric_numer / metric_denom
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None,
                       model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False):

    set_computation_network_trace_level(0)

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width), name='features')
    label_var = C.input_variable((num_classes))

    # create model, and configure learning parameters
    if network_name == 'resnet20':
        z = create_cifar10_model(input_var, 3, num_classes)
        lr_per_mb = [1.0]*80+[0.1]*40+[0.01]
    elif network_name == 'resnet110':
        z = create_cifar10_model(input_var, 18, num_classes)
        lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01]
    else:
        raise RuntimeError("Unknown model name!")

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)

    # shared training parameters
    minibatch_size = 128
    momentum_time_constant = -minibatch_size/np.log(0.9)
    l2_reg_weight = 0.0001

    # Set learning parameters
    lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
    lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    # progress writers
    progress_writers = [ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat)]
    tensorboard_writer = None
    if tensorboard_logdir is not None:
        tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)
        progress_writers.append(tensorboard_writer)

    # trainer object
    learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule,
                           l2_regularization_weight = l2_reg_weight)
    trainer = Trainer(z, (ce, pe), learner, progress_writers)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    log_number_of_parameters(z) ; print()

    # perform model training
    if profiler_dir:
        start_profiler(profiler_dir, True)

    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += trainer.previous_minibatch_sample_count         # count samples processed so far

        trainer.summarize_training_progress()

        # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed.
        if tensorboard_writer:
            for parameter in z.parameters:
                tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch)

        if model_dir:
            z.save(os.path.join(model_dir, network_name + "_{}.dnn".format(epoch)))
        enable_profiler() # begin to collect profiler data after first epoch

    if profiler_dir:
        stop_profiler()

    # Evaluation parameters
    test_epoch_size     = 10000
    minibatch_size = 16

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0

    while sample_count < test_epoch_size:
        current_minibatch = min(minibatch_size, test_epoch_size - sample_count)
        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples

    print("")
    trainer.summarize_test_progress()
    print("")

    return metric_numer/metric_denom
Exemple #27
0
def train_lm():
    data = DataReader(token_to_id_path, segment_sepparator)

    # Create model nodes for the source and target inputs
    input_sequence, label_sequence = create_inputs(data.vocab_dim)

    # Create the model. It has three output nodes
    # z: the input to softmax that  provides the latent representation of the next token
    # cross_entropy: this is used training criterion
    # error: this a binary indicator if the model predicts the correct token
    z, cross_entropy, error = create_model(input_sequence, label_sequence,
                                           data.vocab_dim, hidden_dim)

    # For measurement we use the (build in) full softmax.
    full_ce = C.cross_entropy_with_softmax(z, label_sequence)

    # print out some useful training information
    log_number_of_parameters(z)
    print()

    # Run the training loop
    num_trained_samples = 0
    num_trained_samples_since_last_report = 0

    # Instantiate the trainer object to drive the model training
    lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample)
    momentum_schedule = momentum_as_time_constant_schedule(
        momentum_as_time_constant)
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(
        z.parameters,
        lr_schedule,
        momentum_schedule,
        gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
        gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    trainer = Trainer(z, (cross_entropy, error), learner)

    for epoch_count in range(num_epochs):
        for features, labels, token_count in data.minibatch_generator(
                train_file_path, sequence_length, sequences_per_batch):
            arguments = ({input_sequence: features, label_sequence: labels})

            t_start = timeit.default_timer()
            trainer.train_minibatch(arguments)
            t_end = timeit.default_timer()

            samples_per_second = token_count / (t_end - t_start)

            # Print progress report every num_samples_between_progress_report samples

            if num_trained_samples_since_last_report >= num_samples_between_progress_report or num_trained_samples == 0:
                av_ce = average_cross_entropy(full_ce, input_sequence,
                                              label_sequence, data)
                print_progress(samples_per_second, av_ce, num_trained_samples,
                               t_start)
                num_trained_samples_since_last_report = 0

            num_trained_samples += token_count
            num_trained_samples_since_last_report += token_count

        # after each epoch save the model
        model_filename = "models/lm_epoch%d.dnn" % epoch_count
        z.save_model(model_filename)
        print("Saved model to '%s'" % model_filename)
Exemple #28
0
def train_lm(training_file, epochs, max_num_minibatches):

    # load the data and vocab
    data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(
        training_file)

    # Model the source and target inputs to the model
    input_sequence, label_sequence = create_inputs(vocab_dim)

    # create the model
    model = create_model(vocab_dim)

    # and apply it to the input sequence
    z = model(input_sequence)

    # setup the criterions (loss and metric)
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # Instantiate the trainer object to drive the model training
    lr_per_sample = learning_rate_schedule(0.001, UnitType.sample)
    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    clipping_threshold_per_sample = 5.0
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(
        z.parameters,
        lr_per_sample,
        momentum_time_constant,
        gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
        gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    progress_printer = ProgressPrinter(freq=100, tag='Training')
    trainer = Trainer(z, (ce, errs), learner, progress_printer)

    sample_freq = 1000
    minibatches_per_epoch = min(data_size // minibatch_size,
                                max_num_minibatches // epochs)

    # print out some useful training information
    log_number_of_parameters(z)
    print("Running %d epochs with %d minibatches per epoch" %
          (epochs, minibatches_per_epoch))
    print()

    for e in range(0, epochs):
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        # If it's the start of the data, we specify that we are looking at a new sequence (True)
        mask = [True]
        for b in range(0, minibatches_per_epoch):
            # get the data
            features, labels = get_data(b, minibatch_size, data, char_to_ix,
                                        vocab_dim)
            arguments = ({
                input_sequence: features,
                label_sequence: labels
            }, mask)
            mask = [False]
            trainer.train_minibatch(arguments)

            global_minibatch = e * minibatches_per_epoch + b
            if global_minibatch % sample_freq == 0:
                print(sample(z, ix_to_char, vocab_dim, char_to_ix))

        model_filename = "models/shakespeare_epoch%d.dnn" % (e + 1)
        z.save_model(model_filename)
        print("Saved model to '%s'" % model_filename)