Ejemplo n.º 1
0
def train_lm(training_file, epochs, max_num_minibatches):

    # load the data and vocab
    data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(training_file)

    # Model the source and target inputs to the model
    input_sequence, label_sequence = create_inputs(vocab_dim)

    # create the model
    model = create_model(vocab_dim)
    
    # and apply it to the input sequence    
    z = model(input_sequence)

    # setup the criterions (loss and metric)
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # Instantiate the trainer object to drive the model training
    lr_per_sample = learning_parameter_schedule_per_sample(0.001)
    momentum_schedule = momentum_schedule_per_sample(0.9990913221888589)
    clipping_threshold_per_sample = 5.0
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(z.parameters, lr_per_sample, momentum_schedule,
                           gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
                           gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    progress_printer = ProgressPrinter(freq=100, tag='Training')
    trainer = Trainer(z, (ce, errs), learner, progress_printer)

    sample_freq = 1000
    minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs)

    # print out some useful training information
    log_number_of_parameters(z)
    print ("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch))
    print()

    for e in range(0, epochs):
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        # If it's the start of the data, we specify that we are looking at a new sequence (True)
        mask = [True]
        for b in range(0, minibatches_per_epoch):
            # get the data            
            features, labels = get_data(b, minibatch_size, data, char_to_ix, vocab_dim)
            arguments = ({input_sequence : features, label_sequence : labels}, mask)
            mask = [False] 
            trainer.train_minibatch(arguments)

            global_minibatch = e*minibatches_per_epoch + b
            if global_minibatch % sample_freq == 0:
                print(sample(z, ix_to_char, vocab_dim, char_to_ix))

        model_filename = "models/shakespeare_epoch%d.dnn" % (e+1)
        z.save(model_filename)
        print("Saved model to '%s'" % model_filename)
Ejemplo n.º 2
0
def train_model(reader, model, criterion, epoch_size=50000, max_epochs=80):
    minibatch_size = 64

    # learning parameters
    learner = momentum_sgd(
        model.parameters,
        lr=learning_parameter_schedule_per_sample(
            [0.0015625] * 20 + [0.00046875] * 20 + [0.00015625] * 20 +
            [0.000046875] * 10 + [0.000015625],
            epoch_size=epoch_size),
        momentum=momentum_schedule_per_sample(
            [0] * 20 + [0.9983347214509387] * 20 + [0.9991670137924583],
            epoch_size=epoch_size),
        l2_regularization_weight=0.002)

    # trainer object
    trainer = Trainer(None, criterion, learner)

    # perform model training
    log_number_of_parameters(model)
    print()
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)

    for epoch in range(max_epochs):  # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            mb = reader.next_minibatch(
                min(minibatch_size,
                    epoch_size - sample_count))  # fetch minibatch.
            #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels])
            trainer.train_minibatch({
                criterion.arguments[0]:
                mb[reader.streams.features],
                criterion.arguments[1]:
                mb[reader.streams.labels]
            })
            sample_count += mb[
                reader.streams.
                labels].num_samples  # count samples processed so far
            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress

        loss, metric, actual_samples = progress_printer.epoch_summary(
            with_metric=True)
        model.save(
            os.path.join(model_path,
                         "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))

    # return evaluation error.
    return loss, metric  # return values from last epoch
def train_sequence_classifier():
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = sequence.input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes)

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifier_net(features,
                                                     num_output_classes,
                                                     embedding_dim, hidden_dim,
                                                     cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = ("../../../Tests/EndToEndTests/Text/" +
                "SequenceClassification/Data/Train.ctf")
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        features: reader.streams.features,
        label: reader.streams.labels
    }

    lr_per_sample = learning_parameter_schedule_per_sample(0.0005)
    # Instantiate the trainer object to drive the model training
    progress_printer = ProgressPrinter(0)
    trainer = Trainer(classifier_output, (ce, pe),
                      sgd(classifier_output.parameters, lr=lr_per_sample),
                      progress_printer)

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200

    for i in range(255):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)

    evaluation_average = float(trainer.previous_minibatch_evaluation_average)
    loss_average = float(trainer.previous_minibatch_loss_average)
    return evaluation_average, loss_average
Ejemplo n.º 4
0
def train_sequence_classifier():
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = sequence.input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes)

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifier_net(
        features, num_output_classes, embedding_dim, hidden_dim, cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = ("../../../Tests/EndToEndTests/Text/" +
                "SequenceClassification/Data/Train.ctf")
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
            features: reader.streams.features,
            label:    reader.streams.labels
    }

    lr_per_sample = learning_parameter_schedule_per_sample(0.0005)
    # Instantiate the trainer object to drive the model training
    progress_printer = ProgressPrinter(0)
    trainer = Trainer(classifier_output, (ce, pe),
                      sgd(classifier_output.parameters, lr=lr_per_sample),
                      progress_printer)

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200

    for i in range(255):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)

    evaluation_average = float(trainer.previous_minibatch_evaluation_average)
    loss_average = float(trainer.previous_minibatch_loss_average)
    return evaluation_average, loss_average
def train_and_test(s2smodel, train_reader, test_reader, block_size, num_quantization_bits, max_epochs, epoch_size, minibatch_size, progress_printer, warm_up):
    from Sequence2Sequence import create_criterion_function, create_model_train
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # Create learner
    if block_size is not None and num_quantization_bits != default_quantization_bits:
        raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")

    lr = 0.001 if use_attention else 0.005   # TODO: can we use the same value for both?
    local_learner = fsadagrad(model_train.parameters,
                        lr       = learning_parameter_schedule_per_sample([lr]*2+[lr/2]*3+[lr/4], epoch_size=epoch_size),
                        momentum = momentum_schedule_per_sample(0.9990913221888589),
                        gradient_clipping_threshold_per_sample=2.3,
                        gradient_clipping_with_truncation=True)

    if block_size != None:
        learner = block_momentum_distributed_learner(local_learner, block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)

    trainer = Trainer(None, criterion, learner, progress_printer)

    train_bind = {criterion.arguments[0]: train_reader.streams.features,
                  criterion.arguments[1]: train_reader.streams.labels}

    training_session(
        mb_source = train_reader,
        trainer=trainer,
        model_inputs_to_streams=train_bind,
        mb_size=minibatch_size,
        progress_frequency=epoch_size,
        checkpoint_config=CheckpointConfig(frequency = epoch_size,
                                           filename = os.path.join(model_path, "SequenceToSequence"),
                                           restore = False),
        cv_config=CrossValidationConfig(test_reader, minibatch_size=minibatch_size)
    ).train()
Ejemplo n.º 6
0
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None,
                       model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False):

    set_computation_network_trace_level(0)

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width), name='features')
    label_var = C.input_variable((num_classes))

    # create model, and configure learning parameters
    if network_name == 'resnet20':
        z = create_cifar10_model(input_var, 3, num_classes)
        lr_per_mb = [1.0]*80 + [0.1]*40 + [0.01]
    elif network_name == 'resnet110':
        z = create_cifar10_model(input_var, 18, num_classes)
        lr_per_mb = [0.1]*1 + [1.0]*80 + [0.1]*40 + [0.01]
    else:
        raise RuntimeError("Unknown model name!")

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)

    # shared training parameters
    minibatch_size = 128
    l2_reg_weight = 0.0001

    # Set learning parameters
    lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
    lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
    mm_schedule = momentum_schedule(0.9, minibatch_size)

    # progress writers
    progress_writers = [ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat)]
    tensorboard_writer = None
    if tensorboard_logdir is not None:
        tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)
        progress_writers.append(tensorboard_writer)

    # trainer object
    learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule,
                           l2_regularization_weight=l2_reg_weight)
    trainer = Trainer(z, (ce, pe), learner, progress_writers)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    log_number_of_parameters(z) ; print()

    # perform model training
    if profiler_dir:
        start_profiler(profiler_dir, True)

    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += trainer.previous_minibatch_sample_count         # count samples processed so far

        trainer.summarize_training_progress()

        # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed.
        if tensorboard_writer:
            for parameter in z.parameters:
                tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch)

        if model_dir:
            z.save(os.path.join(model_dir, network_name + "_{}.dnn".format(epoch)))
        enable_profiler() # begin to collect profiler data after first epoch

    if profiler_dir:
        stop_profiler()

    # Evaluation parameters
    test_epoch_size = 10000
    minibatch_size = 16

    # process minibatches and evaluate the model
    metric_numer = 0
    metric_denom = 0
    sample_count = 0

    while sample_count < test_epoch_size:
        current_minibatch = min(minibatch_size, test_epoch_size - sample_count)
        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples

    print("")
    trainer.summarize_test_progress()
    print("")

    return metric_numer/metric_denom
Ejemplo n.º 7
0
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs,
          epoch_size):

    # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause
    # an error since the training criterion uses a reduced sequence axis for the labels.
    # This is because it removes the initial <s> symbol. Hence, we must leave the model
    # with unspecified input shapes and axes.

    # create the training wrapper for the s2smodel, as well as the criterion function
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # also wire in a greedy decoder so that we can properly log progress on a validation example
    # This is not used for the actual training process.
    model_greedy = create_model_greedy(s2smodel)

    # This does not need to be done in training generally though
    # Instantiate the trainer object to drive the model training
    minibatch_size = 72
    lr = 0.001 if use_attention else 0.005  # TODO: can we use the same value for both?
    learner = fsadagrad(
        model_train.parameters,
        lr=learning_parameter_schedule_per_sample([lr] * 2 + [lr / 2] * 3 +
                                                  [lr / 4],
                                                  epoch_size=epoch_size),
        momentum=momentum_schedule_per_sample(0.9990913221888589),
        gradient_clipping_threshold_per_sample=2.3,
        gradient_clipping_with_truncation=True)
    trainer = Trainer(None, criterion, learner)

    # Get minibatches of sequences to train with and perform model training
    total_samples = 0
    mbs = 0
    eval_freq = 100

    # print out some useful training information
    log_number_of_parameters(model_train)
    print()
    progress_printer = ProgressPrinter(freq=30, tag='Training')
    #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file

    sparse_to_dense = create_sparse_to_dense(input_vocab_dim)

    for epoch in range(max_epochs):
        print("Saving model to '%s'" % model_path(epoch))
        s2smodel.save(model_path(epoch))

        while total_samples < (epoch + 1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)
            #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels])
            trainer.train_minibatch({
                criterion.arguments[0]:
                mb_train[train_reader.streams.features],
                criterion.arguments[1]:
                mb_train[train_reader.streams.labels]
            })

            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % eval_freq == 0:
                mb_valid = valid_reader.next_minibatch(1)

                # run an eval on the decoder output model (i.e. don't use the groundtruth)
                e = model_greedy(mb_valid[valid_reader.streams.features])
                print(
                    format_sequences(
                        sparse_to_dense(
                            mb_valid[valid_reader.streams.features]), i2w))
                print("->")
                print(format_sequences(e, i2w))

                # debugging attention
                if use_attention:
                    debug_attention(model_greedy,
                                    mb_valid[valid_reader.streams.features])

            total_samples += mb_train[train_reader.streams.labels].num_samples
            mbs += 1

        # log a summary of the stats for the epoch
        progress_printer.epoch_summary(with_metric=True)

    # done: save the final model
    print("Saving final model to '%s'" % model_path(max_epochs))
    s2smodel.save(model_path(max_epochs))
    print("%d epochs complete." % max_epochs)
Ejemplo n.º 8
0
def train_fast_rcnn(debug_output=False, model_path=model_file):
    if debug_output:
        print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output"))

    # Create the minibatch source
    minibatch_source = create_mb_source(image_height, image_width, num_channels,
                                        num_classes, num_rois, base_path, "train")

    # Input variables denoting features, rois and label data
    image_input = C.input_variable((num_channels, image_height, image_width))
    roi_input   = C.input_variable((num_rois, 4))
    label_input = C.input_variable((num_rois, num_classes))

    # define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source.streams.features,
        roi_input: minibatch_source.streams.rois,
        label_input: minibatch_source.streams.roiLabels
    }

    # Instantiate the Fast R-CNN prediction model and loss function
    frcn_output = frcn_predictor(image_input, roi_input, num_classes, model_path)
    ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1)
    pe = classification_error(frcn_output, label_input, axis=1)
    if debug_output:
        plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png"))

    # Set learning parameters
    l2_reg_weight = 0.0005
    lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001]
    lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample)
    mm_schedule = momentum_schedule_per_sample(momentum_per_sample)

    # Instantiate the trainer object as default
    learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
    # Preparation for distributed learning, which is compatible for normal learner
    learner = distributed.data_parallel_distributed_learner(
        learner = learner,
        num_quantization_bits = num_quantization_bits,   # non-quantized gradient accumulation
        distributed_after = warm_up)                     # no warm start as default            
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs, rank=distributed.Communicator.rank())
    trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer)

    # Get minibatches of images and perform model training
    print("Training Fast R-CNN model for %s epochs." % max_epochs)
    log_number_of_parameters(frcn_output)
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(min(mb_size * C.Communicator.num_workers(), epoch_size-sample_count), 
                input_map=input_map, 
                num_data_partitions=C.Communicator.num_workers(), 
                partition_index=C.Communicator.rank())     
            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far

        trainer.summarize_training_progress()
        if debug_output:
            frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1)))

    if distributed_flg:
        distributed.Communicator.finalize()

    return frcn_output
Ejemplo n.º 9
0
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None,
                       model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False, fp16=False):

    set_computation_network_trace_level(0)

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width), name='features')
    label_var = C.input_variable((num_classes))

    dtype = np.float16 if fp16 else np.float32
    if fp16:
        graph_input = C.cast(input_var, dtype=np.float16)
        graph_label = C.cast(label_var, dtype=np.float16)
    else:
        graph_input = input_var
        graph_label = label_var

    with C.default_options(dtype=dtype):
        # create model, and configure learning parameters
        if network_name == 'resnet20':
            z = create_cifar10_model(graph_input, 3, num_classes)
            lr_per_mb = [1.0]*80 + [0.1]*40 + [0.01]
        elif network_name == 'resnet110':
            z = create_cifar10_model(graph_input, 18, num_classes)
            lr_per_mb = [0.1]*1 + [1.0]*80 + [0.1]*40 + [0.01]
        else:
            raise RuntimeError("Unknown model name!")

        # loss and metric
        ce = cross_entropy_with_softmax(z, graph_label)
        pe = classification_error(z, graph_label)

    if fp16:
        ce = C.cast(ce, dtype=np.float32)
        pe = C.cast(pe, dtype=np.float32)

    # shared training parameters
    minibatch_size = 128
    l2_reg_weight = 0.0001

    # Set learning parameters
    lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
    lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
    mm_schedule = momentum_schedule(0.9, minibatch_size)

    # progress writers
    progress_writers = [ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat)]
    tensorboard_writer = None
    if tensorboard_logdir is not None:
        tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)
        progress_writers.append(tensorboard_writer)

    # trainer object
    learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule,
                           l2_regularization_weight=l2_reg_weight)
    trainer = Trainer(z, (ce, pe), learner, progress_writers)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    log_number_of_parameters(z) ; print()

    # perform model training
    if profiler_dir:
        start_profiler(profiler_dir, True)

    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += trainer.previous_minibatch_sample_count         # count samples processed so far

        trainer.summarize_training_progress()

        # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed.
        if tensorboard_writer:
            for parameter in z.parameters:
                tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch)

        if model_dir:
            z.save(os.path.join(model_dir, network_name + "_{}.dnn".format(epoch)))
        enable_profiler() # begin to collect profiler data after first epoch

    if profiler_dir:
        stop_profiler()

    # Evaluation parameters
    test_epoch_size = 10000
    minibatch_size = 16

    # process minibatches and evaluate the model
    metric_numer = 0
    metric_denom = 0
    sample_count = 0

    while sample_count < test_epoch_size:
        current_minibatch = min(minibatch_size, test_epoch_size - sample_count)
        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples

    print("")
    trainer.summarize_test_progress()
    print("")

    return metric_numer/metric_denom
Ejemplo n.º 10
0
def simple_mnist(tensorboard_logdir=None):
    input_dim = 19
    num_output_classes = 2
    num_hidden_layers = 2
    hidden_layers_dim = 1024

    # Input variables denoting the features and label data
    feature = C.input_variable(input_dim, np.float32)
    label = C.input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    # scaled_input = element_times(constant(0.00390625), feature)

    z = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=relu)),
                    Dense(num_output_classes)])(feature)

    ce = cross_entropy_with_softmax(z, label)
    pe = classification_error(z, label)

    data_dir = r"."

    path = os.path.normpath(os.path.join(data_dir, "train.ctf"))
    check_path(path)

    reader_train = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        feature  : reader_train.streams.features,
        label  : reader_train.streams.labels
    }

    # Training config
    minibatch_size = 512
    num_samples_per_sweep = 1825000
    num_sweeps_to_train_with = 100

    # Instantiate progress writers.
    progress_writers = [ProgressPrinter(
        tag='Training',
        num_epochs=num_sweeps_to_train_with)]

    if tensorboard_logdir is not None:
        tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)
        progress_writers.append(tensorboard_writer)

    # Instantiate the trainer object to drive the model training
    lr = learning_parameter_schedule_per_sample(0.001)
    learner = create_learner(model=z)
    trainer = Trainer(z, (ce, pe), learner, progress_writers)

    num_minibatches_to_train = int(num_samples_per_sweep / minibatch_size * num_sweeps_to_train_with)

    model_dir = "model"
    for i in range(num_minibatches_to_train):
        mb = reader_train.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)

        freq = int(num_samples_per_sweep / minibatch_size)
        if i > 0 and i % freq == 0:
            timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")
            current_trainer_cp = os.path.join(model_dir, timestamp + "_epoch_" + str(freq) + ".trainer")
            trainer.save_checkpoint(current_trainer_cp)

            train_error = get_error_rate(os.path.join(data_dir, "train_subset.ctf"), input_map, input_dim,
                                         num_output_classes, trainer)
            valid_error = get_error_rate(os.path.join(data_dir, "validation.ctf"), input_map, input_dim, num_output_classes,
                                         trainer)

            if train_error > 0:
                tensorboard_writer.write_value("train_error", train_error, i)
            if valid_error > 0:
                tensorboard_writer.write_value("valid_error", valid_error, i)

    feat_path = os.path.normpath(os.path.join(data_dir, "test.ctf"))
    return get_error_rate(feat_path, input_map, input_dim, num_output_classes, trainer)
def train_lm(training_file, epochs, max_num_minibatches):

    # load the data and vocab
    data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(
        training_file)

    # Model the source and target inputs to the model
    input_sequence, label_sequence = create_inputs(vocab_dim)

    # create the model
    model = create_model(vocab_dim)

    # and apply it to the input sequence
    z = model(input_sequence)

    # setup the criterions (loss and metric)
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # Instantiate the trainer object to drive the model training
    lr_per_sample = learning_parameter_schedule_per_sample(0.001)
    momentum_schedule = momentum_schedule_per_sample(0.9990913221888589)
    clipping_threshold_per_sample = 5.0
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(
        z.parameters,
        lr_per_sample,
        momentum_schedule,
        gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
        gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    progress_printer = ProgressPrinter(freq=100, tag='Training')
    trainer = Trainer(z, (ce, errs), learner, progress_printer)

    sample_freq = 1000
    minibatches_per_epoch = min(data_size // minibatch_size,
                                max_num_minibatches // epochs)

    # print out some useful training information
    log_number_of_parameters(z)
    print("Running %d epochs with %d minibatches per epoch" %
          (epochs, minibatches_per_epoch))
    print()

    for e in range(0, epochs):
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        # If it's the start of the data, we specify that we are looking at a new sequence (True)
        mask = [True]
        for b in range(0, minibatches_per_epoch):
            # get the data
            features, labels = get_data(b, minibatch_size, data, char_to_ix,
                                        vocab_dim)
            arguments = ({
                input_sequence: features,
                label_sequence: labels
            }, mask)
            mask = [False]
            trainer.train_minibatch(arguments)

            global_minibatch = e * minibatches_per_epoch + b
            if global_minibatch % sample_freq == 0:
                print(sample(z, ix_to_char, vocab_dim, char_to_ix))

        model_filename = "models/shakespeare_epoch%d.dnn" % (e + 1)
        z.save(model_filename)
        print("Saved model to '%s'" % model_filename)
Ejemplo n.º 12
0
def train_fast_rcnn(debug_output=False, model_path=model_file):
    if debug_output:
        print("Storing graphs and intermediate models to %s." %
              os.path.join(abs_path, "Output"))

    # Create the minibatch source
    minibatch_source = create_mb_source(image_height, image_width,
                                        num_channels, num_classes, num_rois,
                                        base_path, "train")

    # Input variables denoting features, rois and label data
    image_input = C.input_variable((num_channels, image_height, image_width))
    roi_input = C.input_variable((num_rois, 4))
    label_input = C.input_variable((num_rois, num_classes))

    # define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source.streams.features,
        roi_input: minibatch_source.streams.rois,
        label_input: minibatch_source.streams.roiLabels
    }

    # Instantiate the Fast R-CNN prediction model and loss function
    frcn_output = frcn_predictor(image_input, roi_input, num_classes,
                                 model_path)
    ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1)
    pe = classification_error(frcn_output, label_input, axis=1)
    if debug_output:
        plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png"))

    # Set learning parameters
    l2_reg_weight = 0.0005
    lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001]
    lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample)
    mm_schedule = momentum_schedule_per_sample(momentum_per_sample)

    # Instantiate the trainer object as default
    learner = momentum_sgd(frcn_output.parameters,
                           lr_schedule,
                           mm_schedule,
                           l2_regularization_weight=l2_reg_weight)
    # Preparation for distributed learning, which is compatible for normal learner
    learner = distributed.data_parallel_distributed_learner(
        learner=learner,
        num_quantization_bits=
        num_quantization_bits,  # non-quantized gradient accumulation
        distributed_after=warm_up)  # no warm start as default
    progress_printer = ProgressPrinter(tag='Training',
                                       num_epochs=max_epochs,
                                       rank=distributed.Communicator.rank())
    trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer)

    # Get minibatches of images and perform model training
    print("Training Fast R-CNN model for %s epochs." % max_epochs)
    log_number_of_parameters(frcn_output)
    for epoch in range(max_epochs):  # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(
                min(mb_size * C.Communicator.num_workers(),
                    epoch_size - sample_count),
                input_map=input_map,
                num_data_partitions=C.Communicator.num_workers(),
                partition_index=C.Communicator.rank())
            trainer.train_minibatch(data)  # update model with it
            sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far

        trainer.summarize_training_progress()
        if debug_output:
            frcn_output.save(
                os.path.join(abs_path, "Output",
                             "frcn_py_%s.model" % (epoch + 1)))

    if distributed_flg:
        distributed.Communicator.finalize()

    return frcn_output
Ejemplo n.º 13
0
def simple_mnist(tensorboard_logdir=None):
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 1
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    feature = C.input_variable(input_dim, np.float32)
    label = C.input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant(0.00390625), feature)

    z = Sequential([
        For(range(num_hidden_layers),
            lambda i: Dense(hidden_layers_dim, activation=relu)),
        Dense(num_output_classes)
    ])(scaled_input)

    ce = cross_entropy_with_softmax(z, label)
    pe = classification_error(z, label)

    data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST")

    path = os.path.normpath(os.path.join(data_dir,
                                         "Train-28x28_cntk_text.txt"))
    check_path(path)

    reader_train = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        feature: reader_train.streams.features,
        label: reader_train.streams.labels
    }

    # Training config
    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10

    # Instantiate progress writers.
    #training_progress_output_freq = 100
    progress_writers = [
        ProgressPrinter(
            #freq=training_progress_output_freq,
            tag='Training',
            num_epochs=num_sweeps_to_train_with)
    ]

    if tensorboard_logdir is not None:
        progress_writers.append(
            TensorBoardProgressWriter(freq=10,
                                      log_dir=tensorboard_logdir,
                                      model=z))

    # Instantiate the trainer object to drive the model training
    lr = learning_parameter_schedule_per_sample(1)
    trainer = Trainer(z, (ce, pe), adadelta(z.parameters, lr),
                      progress_writers)

    training_session(trainer=trainer,
                     mb_source=reader_train,
                     mb_size=minibatch_size,
                     model_inputs_to_streams=input_map,
                     max_samples=num_samples_per_sweep *
                     num_sweeps_to_train_with,
                     progress_frequency=num_samples_per_sweep).train()

    # Load test data
    path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt"))
    check_path(path)

    reader_test = create_reader(path, False, input_dim, num_output_classes)

    input_map = {
        feature: reader_test.streams.features,
        label: reader_test.streams.labels
    }

    # Test data for trained model
    test_minibatch_size = 1024
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0.0
    for i in range(0, int(num_minibatches_to_test)):
        mb = reader_test.next_minibatch(test_minibatch_size,
                                        input_map=input_map)
        eval_error = trainer.test_minibatch(mb)
        test_result = test_result + eval_error

    # Average of evaluation errors of all test minibatches
    return test_result / num_minibatches_to_test
Ejemplo n.º 14
0
def train_model(image_input, roi_input, dims_input, loss, pred_error,
                lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train, cfg,
                rpn_rois_input=None, buffered_rpn_proposals=None):
    if isinstance(loss, cntk.Variable):
        loss = combine([loss])

    params = loss.parameters
    biases = [p for p in params if '.b' in p.name or 'b' == p.name]
    others = [p for p in params if not p in biases]
    bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT

    if cfg["CNTK"].DEBUG_OUTPUT:
        print("biases")
        for p in biases: print(p)
        print("others")
        for p in others: print(p)
        print("bias_lr_mult: {}".format(bias_lr_mult))

    # Instantiate the learners and the trainer object
    lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample)
    learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
                           unit_gain=False, use_mean_gradient=True)

    bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample]
    bias_lr_schedule = learning_parameter_schedule_per_sample(bias_lr_per_sample)
    bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
                           unit_gain=False, use_mean_gradient=True)
    trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])

    # Get minibatches of images and perform model training
    print("Training model for %s epochs." % epochs_to_train)
    log_number_of_parameters(loss)

    # Create the minibatch source
    if buffered_rpn_proposals is not None:
        proposal_provider = ProposalProvider.fromlist(buffered_rpn_proposals, requires_scaling=False)
    else:
        proposal_provider = None

    od_minibatch_source = ObjectDetectionMinibatchSource(
        cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE,
        num_classes=cfg["DATA"].NUM_CLASSES,
        max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE,
        pad_width=cfg.IMAGE_WIDTH,
        pad_height=cfg.IMAGE_HEIGHT,
        pad_value=cfg["MODEL"].IMG_PAD_COLOR,
        randomize=True,
        use_flipping=cfg["TRAIN"].USE_FLIPPED,
        max_images=cfg["DATA"].NUM_TRAIN_IMAGES,
        proposal_provider=proposal_provider)

    # define mapping from reader streams to network inputs
    input_map = {
        od_minibatch_source.image_si: image_input,
        od_minibatch_source.roi_si: roi_input,
    }
    if buffered_rpn_proposals is not None:
        input_map[od_minibatch_source.proposals_si] = rpn_rois_input
    else:
        input_map[od_minibatch_source.dims_si] = dims_input

    progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True)
    for epoch in range(epochs_to_train):       # loop over epochs
        sample_count = 0
        while sample_count < cfg["DATA"].NUM_TRAIN_IMAGES:  # loop over minibatches in the epoch
            data = od_minibatch_source.next_minibatch(min(cfg.MB_SIZE, cfg["DATA"].NUM_TRAIN_IMAGES-sample_count), input_map=input_map)
            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
            #progress_printer.update_with_trainer(trainer, with_metric=True)  # log progress
            if sample_count % 100 == 0:
                continue
                #print("Processed {} samples".format(sample_count))

        progress_printer.epoch_summary(with_metric=True)
Ejemplo n.º 15
0
def train_model(image_input, roi_input, dims_input, loss, pred_error,
                lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train, cfg,
                rpn_rois_input=None, buffered_rpn_proposals=None):
    if isinstance(loss, cntk.Variable):
        loss = combine([loss])

    params = loss.parameters
    biases = [p for p in params if '.b' in p.name or 'b' == p.name]
    others = [p for p in params if not p in biases]
    bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT

    if cfg["CNTK"].DEBUG_OUTPUT:
        print("biases")
        for p in biases: print(p)
        print("others")
        for p in others: print(p)
        print("bias_lr_mult: {}".format(bias_lr_mult))

    # Instantiate the learners and the trainer object
    lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample)
    learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
                           unit_gain=False, use_mean_gradient=True)

    bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample]
    bias_lr_schedule = learning_parameter_schedule_per_sample(bias_lr_per_sample)
    bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
                           unit_gain=False, use_mean_gradient=True)
    trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])

    # Get minibatches of images and perform model training
    print("Training model for %s epochs." % epochs_to_train)
    log_number_of_parameters(loss)

    # Create the minibatch source
    if buffered_rpn_proposals is not None:
        proposal_provider = ProposalProvider.fromlist(buffered_rpn_proposals, requires_scaling=False)
    else:
        proposal_provider = None

    od_minibatch_source = ObjectDetectionMinibatchSource(
        cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE,
        num_classes=cfg["DATA"].NUM_CLASSES,
        max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE,
        pad_width=cfg.IMAGE_WIDTH,
        pad_height=cfg.IMAGE_HEIGHT,
        pad_value=cfg["MODEL"].IMG_PAD_COLOR,
        randomize=True,
        use_flipping=cfg["TRAIN"].USE_FLIPPED,
        max_images=cfg["DATA"].NUM_TRAIN_IMAGES,
        proposal_provider=proposal_provider)

    # define mapping from reader streams to network inputs
    input_map = {
        od_minibatch_source.image_si: image_input,
        od_minibatch_source.roi_si: roi_input,
    }
    if buffered_rpn_proposals is not None:
        input_map[od_minibatch_source.proposals_si] = rpn_rois_input
    else:
        input_map[od_minibatch_source.dims_si] = dims_input

    progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True)
    for epoch in range(epochs_to_train):       # loop over epochs
        sample_count = 0
        while sample_count < cfg["DATA"].NUM_TRAIN_IMAGES:  # loop over minibatches in the epoch
            data = od_minibatch_source.next_minibatch(min(cfg.MB_SIZE, cfg["DATA"].NUM_TRAIN_IMAGES-sample_count), input_map=input_map)
            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True)  # log progress
            if sample_count % 100 == 0:
                print("Processed {} samples".format(sample_count))

        progress_printer.epoch_summary(with_metric=True)
Ejemplo n.º 16
0
def simple_mnist():
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 1
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    feature = C.input_variable(input_dim, np.float32)
    label = C.input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant(0.00390625), feature)

    z = Sequential([
        For(range(num_hidden_layers),
            lambda i: Dense(hidden_layers_dim, activation=relu)),
        Dense(num_output_classes)
    ])(scaled_input)

    ce = cross_entropy_with_softmax(z, label)
    pe = classification_error(z, label)
    dataDir = os.getcwd()

    path = os.path.normpath(os.path.join(dataDir, "Train-28x28_cntk_text.txt"))

    reader_train = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        feature: reader_train.streams.features,
        label: reader_train.streams.labels
    }

    # Training config
    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10

    # Instantiate progress writers.
    #training_progress_output_freq = 100
    progress_writers = [
        ProgressPrinter(
            #freq=training_progress_output_freq,
            tag='Training',
            num_epochs=num_sweeps_to_train_with)
    ]

    # Instantiate the trainer object to drive the model training
    lr = learning_parameter_schedule_per_sample(1)
    trainer = Trainer(z, (ce, pe), adadelta(z.parameters, lr),
                      progress_writers)

    training_session(trainer=trainer,
                     mb_source=reader_train,
                     mb_size=minibatch_size,
                     model_inputs_to_streams=input_map,
                     max_samples=num_samples_per_sweep *
                     num_sweeps_to_train_with,
                     progress_frequency=num_samples_per_sweep).train()

    # Load test data
    path = os.path.normpath(os.path.join(dataDir, "Test-28x28_cntk_text.txt"))

    reader_test = create_reader(path, False, input_dim, num_output_classes)

    input_map = {
        feature: reader_test.streams.features,
        label: reader_test.streams.labels
    }

    # Test data for trained model
    C.debugging.start_profiler()
    C.debugging.enable_profiler()
    C.debugging.set_node_timing(True)
    #C.cntk_py.disable_cpueval_optimization() # uncomment this to check CPU eval perf without optimization

    test_minibatch_size = 1024
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0.0
    for i in range(0, int(num_minibatches_to_test)):
        mb = reader_test.next_minibatch(test_minibatch_size,
                                        input_map=input_map)
        eval_error = trainer.test_minibatch(mb)
        test_result = test_result + eval_error

    C.debugging.stop_profiler()
    trainer.print_node_timing()

    # Average of evaluation errors of all test minibatches
    return test_result / num_minibatches_to_test
Ejemplo n.º 17
0
def simple_mnist(tensorboard_logdir=None):
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 1
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    feature = C.input_variable(input_dim, np.float32)
    label = C.input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant(0.00390625), feature)

    z = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=relu)),
                    Dense(num_output_classes)])(scaled_input)

    ce = cross_entropy_with_softmax(z, label)
    pe = classification_error(z, label)

    data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST")

    path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt"))
    check_path(path)

    reader_train = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        feature  : reader_train.streams.features,
        label  : reader_train.streams.labels
    }

    # Training config
    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10

    # Instantiate progress writers.
    #training_progress_output_freq = 100
    progress_writers = [ProgressPrinter(
        #freq=training_progress_output_freq,
        tag='Training',
        num_epochs=num_sweeps_to_train_with)]

    if tensorboard_logdir is not None:
        progress_writers.append(TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z))

    # Instantiate the trainer object to drive the model training
    lr = learning_parameter_schedule_per_sample(1)
    trainer = Trainer(z, (ce, pe), adadelta(z.parameters, lr), progress_writers)

    training_session(
        trainer=trainer,
        mb_source = reader_train,
        mb_size = minibatch_size,
        model_inputs_to_streams = input_map,
        max_samples = num_samples_per_sweep * num_sweeps_to_train_with,
        progress_frequency=num_samples_per_sweep
    ).train()

    # Load test data
    path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt"))
    check_path(path)

    reader_test = create_reader(path, False, input_dim, num_output_classes)

    input_map = {
        feature  : reader_test.streams.features,
        label  : reader_test.streams.labels
    }

    # Test data for trained model
    C.debugging.start_profiler()
    C.debugging.enable_profiler()
    C.debugging.set_node_timing(True)
    #C.cntk_py.disable_cpueval_optimization() # uncomment this to check CPU eval perf without optimization

    test_minibatch_size = 1024
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0.0
    for i in range(0, int(num_minibatches_to_test)):
        mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map)
        eval_error = trainer.test_minibatch(mb)
        test_result = test_result + eval_error

    C.debugging.stop_profiler()
    trainer.print_node_timing()

    # Average of evaluation errors of all test minibatches
    return test_result / num_minibatches_to_test
Ejemplo n.º 18
0
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size):

    # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause
    # an error since the training criterion uses a reduced sequence axis for the labels.
    # This is because it removes the initial <s> symbol. Hence, we must leave the model
    # with unspecified input shapes and axes.

    # create the training wrapper for the s2smodel, as well as the criterion function
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # also wire in a greedy decoder so that we can properly log progress on a validation example
    # This is not used for the actual training process.
    model_greedy = create_model_greedy(s2smodel)

    # This does not need to be done in training generally though
    # Instantiate the trainer object to drive the model training
    minibatch_size = 72
    lr = 0.001 if use_attention else 0.005   # TODO: can we use the same value for both?
    learner = fsadagrad(model_train.parameters,
                        lr       = learning_parameter_schedule_per_sample([lr]*2+[lr/2]*3+[lr/4], epoch_size=epoch_size),
                        momentum = momentum_schedule_per_sample(0.9990913221888589),
                        gradient_clipping_threshold_per_sample=2.3,
                        gradient_clipping_with_truncation=True)
    trainer = Trainer(None, criterion, learner)

    # Get minibatches of sequences to train with and perform model training
    total_samples = 0
    mbs = 0
    eval_freq = 100

    # print out some useful training information
    log_number_of_parameters(model_train) ; print()
    progress_printer = ProgressPrinter(freq=30, tag='Training')
    #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file

    sparse_to_dense = create_sparse_to_dense(input_vocab_dim)

    for epoch in range(max_epochs):
        print("Saving model to '%s'" % model_path(epoch))
        s2smodel.save(model_path(epoch))

        while total_samples < (epoch+1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)
            #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels])
            trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels]})

            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % eval_freq == 0:
                mb_valid = valid_reader.next_minibatch(1)

                # run an eval on the decoder output model (i.e. don't use the groundtruth)
                e = model_greedy(mb_valid[valid_reader.streams.features])
                print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w))
                print("->")
                print(format_sequences(e, i2w))

                # debugging attention
                if use_attention:
                    debug_attention(model_greedy, mb_valid[valid_reader.streams.features])

            total_samples += mb_train[train_reader.streams.labels].num_samples
            mbs += 1

        # log a summary of the stats for the epoch
        progress_printer.epoch_summary(with_metric=True)

    # done: save the final model
    print("Saving final model to '%s'" % model_path(max_epochs))
    s2smodel.save(model_path(max_epochs))
    print("%d epochs complete." % max_epochs)
Ejemplo n.º 19
0
def train_fast_rcnn(cfg):
    # Train only if no model exists yet
    model_path = cfg['MODEL_PATH']
    if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE:
        print("Loading existing model from %s" % model_path)
        return load_model(model_path)
    else:
        # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi)
        image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT,
                                            cfg.IMAGE_WIDTH),
                                     dynamic_axes=[Axis.default_batch_axis()],
                                     name=cfg["MODEL"].FEATURE_NODE_NAME)
        roi_proposals = input_variable(
            (cfg.NUM_ROI_PROPOSALS, 4),
            dynamic_axes=[Axis.default_batch_axis()],
            name="roi_proposals")
        label_targets = input_variable(
            (cfg.NUM_ROI_PROPOSALS, cfg["DATA"].NUM_CLASSES),
            dynamic_axes=[Axis.default_batch_axis()])
        bbox_targets = input_variable(
            (cfg.NUM_ROI_PROPOSALS, 4 * cfg["DATA"].NUM_CLASSES),
            dynamic_axes=[Axis.default_batch_axis()])
        bbox_inside_weights = input_variable(
            (cfg.NUM_ROI_PROPOSALS, 4 * cfg["DATA"].NUM_CLASSES),
            dynamic_axes=[Axis.default_batch_axis()])

        # Instantiate the Fast R-CNN prediction model and loss function
        loss, pred_error = create_fast_rcnn_model(image_input, roi_proposals,
                                                  label_targets, bbox_targets,
                                                  bbox_inside_weights, cfg)
        if isinstance(loss, cntk.Variable):
            loss = combine([loss])

        if cfg["CNTK"].DEBUG_OUTPUT:
            print("Storing graphs and models to %s." % cfg.OUTPUT_PATH)
            plot(
                loss,
                os.path.join(cfg.OUTPUT_PATH,
                             "graph_frcn_train." + cfg["CNTK"].GRAPH_TYPE))

        # Set learning parameters
        lr_factor = cfg["CNTK"].LR_FACTOR
        lr_per_sample_scaled = [
            x * lr_factor for x in cfg["CNTK"].LR_PER_SAMPLE
        ]
        mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB)
        l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT
        epochs_to_train = cfg["CNTK"].MAX_EPOCHS

        print("Using base model:   {}".format(cfg["MODEL"].BASE_MODEL))
        print("lr_per_sample:      {}".format(lr_per_sample_scaled))

        # --- train ---
        # Instantiate the learners and the trainer object
        params = loss.parameters
        biases = [p for p in params if '.b' in p.name or 'b' == p.name]
        others = [p for p in params if not p in biases]
        bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT
        lr_schedule = learning_parameter_schedule_per_sample(
            lr_per_sample_scaled)
        learner = momentum_sgd(others,
                               lr_schedule,
                               mm_schedule,
                               l2_regularization_weight=l2_reg_weight,
                               unit_gain=False,
                               use_mean_gradient=True)

        bias_lr_per_sample = [
            v * bias_lr_mult for v in cfg["CNTK"].LR_PER_SAMPLE
        ]
        bias_lr_schedule = learning_parameter_schedule_per_sample(
            bias_lr_per_sample)
        bias_learner = momentum_sgd(biases,
                                    bias_lr_schedule,
                                    mm_schedule,
                                    l2_regularization_weight=l2_reg_weight,
                                    unit_gain=False,
                                    use_mean_gradient=True)
        trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])

        # Get minibatches of images and perform model training
        print("Training model for %s epochs." % epochs_to_train)
        log_number_of_parameters(loss)

        # Create the minibatch source
        if cfg.USE_PRECOMPUTED_PROPOSALS:
            proposal_provider = ProposalProvider.fromfile(
                cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE,
                cfg.NUM_ROI_PROPOSALS)
        else:
            proposal_provider = ProposalProvider.fromconfig(cfg)

        od_minibatch_source = ObjectDetectionMinibatchSource(
            cfg["DATA"].TRAIN_MAP_FILE,
            cfg["DATA"].TRAIN_ROI_FILE,
            max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE,
            pad_width=cfg.IMAGE_WIDTH,
            pad_height=cfg.IMAGE_HEIGHT,
            pad_value=cfg["MODEL"].IMG_PAD_COLOR,
            randomize=True,
            use_flipping=cfg["TRAIN"].USE_FLIPPED,
            max_images=cfg["DATA"].NUM_TRAIN_IMAGES,
            num_classes=cfg["DATA"].NUM_CLASSES,
            proposal_provider=proposal_provider,
            provide_targets=True,
            proposal_iou_threshold=cfg.BBOX_THRESH,
            normalize_means=None
            if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_MEANS,
            normalize_stds=None
            if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_STDS)

        # define mapping from reader streams to network inputs
        input_map = {
            od_minibatch_source.image_si: image_input,
            od_minibatch_source.proposals_si: roi_proposals,
            od_minibatch_source.label_targets_si: label_targets,
            od_minibatch_source.bbox_targets_si: bbox_targets,
            od_minibatch_source.bbiw_si: bbox_inside_weights
        }

        progress_printer = ProgressPrinter(tag='Training',
                                           num_epochs=epochs_to_train,
                                           gen_heartbeat=True)
        for epoch in range(epochs_to_train):  # loop over epochs
            sample_count = 0
            while sample_count < cfg[
                    "DATA"].NUM_TRAIN_IMAGES:  # loop over minibatches in the epoch
                data = od_minibatch_source.next_minibatch(min(
                    cfg.MB_SIZE, cfg["DATA"].NUM_TRAIN_IMAGES - sample_count),
                                                          input_map=input_map)

                trainer.train_minibatch(data)  # update model with it
                sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far
                progress_printer.update_with_trainer(
                    trainer, with_metric=True)  # log progress
                if sample_count % 100 == 0:
                    continue
                    #print("Processed {} samples".format(sample_count))

            progress_printer.epoch_summary(with_metric=True)

        eval_model = create_fast_rcnn_eval_model(loss, image_input,
                                                 roi_proposals, cfg)
        eval_model.save(cfg['MODEL_PATH'])
        return eval_model
Ejemplo n.º 20
0
def train_fast_rcnn(cfg):
    # Train only if no model exists yet
    model_path = cfg['MODEL_PATH']
    if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE:
        print("Loading existing model from %s" % model_path)
        return load_model(model_path)
    else:
        # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi)
        image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH),
                                     dynamic_axes=[Axis.default_batch_axis()],
                                     name=cfg["MODEL"].FEATURE_NODE_NAME)
        roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name = "roi_proposals")
        label_targets = input_variable((cfg.NUM_ROI_PROPOSALS, cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()])
        bbox_targets = input_variable((cfg.NUM_ROI_PROPOSALS, 4*cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()])
        bbox_inside_weights = input_variable((cfg.NUM_ROI_PROPOSALS, 4*cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()])

        # Instantiate the Fast R-CNN prediction model and loss function
        loss, pred_error = create_fast_rcnn_model(image_input, roi_proposals, label_targets, bbox_targets, bbox_inside_weights, cfg)
        if isinstance(loss, cntk.Variable):
            loss = combine([loss])

        if cfg["CNTK"].DEBUG_OUTPUT:
            print("Storing graphs and models to %s." % cfg.OUTPUT_PATH)
            plot(loss, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train." + cfg["CNTK"].GRAPH_TYPE))

        # Set learning parameters
        lr_factor = cfg["CNTK"].LR_FACTOR
        lr_per_sample_scaled = [x * lr_factor for x in cfg["CNTK"].LR_PER_SAMPLE]
        mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB)
        l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT
        epochs_to_train = cfg["CNTK"].MAX_EPOCHS

        print("Using base model:   {}".format(cfg["MODEL"].BASE_MODEL))
        print("lr_per_sample:      {}".format(lr_per_sample_scaled))

        # --- train ---
        # Instantiate the learners and the trainer object
        params = loss.parameters
        biases = [p for p in params if '.b' in p.name or 'b' == p.name]
        others = [p for p in params if not p in biases]
        bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT
        lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample_scaled)
        learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True)

        bias_lr_per_sample = [v * bias_lr_mult for v in cfg["CNTK"].LR_PER_SAMPLE]
        bias_lr_schedule = learning_parameter_schedule_per_sample(bias_lr_per_sample)
        bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True)
        trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])

        # Get minibatches of images and perform model training
        print("Training model for %s epochs." % epochs_to_train)
        log_number_of_parameters(loss)

        # Create the minibatch source
        if cfg.USE_PRECOMPUTED_PROPOSALS:
            proposal_provider = ProposalProvider.fromfile(cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS)
        else:
            proposal_provider = ProposalProvider.fromconfig(cfg)

        od_minibatch_source = ObjectDetectionMinibatchSource(
            cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE,
            max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE,
            pad_width=cfg.IMAGE_WIDTH,
            pad_height=cfg.IMAGE_HEIGHT,
            pad_value=cfg["MODEL"].IMG_PAD_COLOR,
            randomize=True,
            use_flipping=cfg["TRAIN"].USE_FLIPPED,
            max_images=cfg["DATA"].NUM_TRAIN_IMAGES,
            num_classes=cfg["DATA"].NUM_CLASSES,
            proposal_provider=proposal_provider,
            provide_targets=True,
            proposal_iou_threshold = cfg.BBOX_THRESH,
            normalize_means = None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_MEANS,
            normalize_stds = None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_STDS)

        # define mapping from reader streams to network inputs
        input_map = {
            od_minibatch_source.image_si: image_input,
            od_minibatch_source.proposals_si: roi_proposals,
            od_minibatch_source.label_targets_si: label_targets,
            od_minibatch_source.bbox_targets_si: bbox_targets,
            od_minibatch_source.bbiw_si: bbox_inside_weights
        }

        progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True)
        for epoch in range(epochs_to_train):  # loop over epochs
            sample_count = 0
            while sample_count < cfg["DATA"].NUM_TRAIN_IMAGES:  # loop over minibatches in the epoch
                data = od_minibatch_source.next_minibatch(min(cfg.MB_SIZE, cfg["DATA"].NUM_TRAIN_IMAGES - sample_count), input_map=input_map)

                trainer.train_minibatch(data)  # update model with it
                sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far
                progress_printer.update_with_trainer(trainer, with_metric=True)  # log progress
                if sample_count % 100 == 0:
                    print("Processed {} samples".format(sample_count))

            progress_printer.epoch_summary(with_metric=True)

        eval_model = create_fast_rcnn_eval_model(loss, image_input, roi_proposals, cfg)
        eval_model.save(cfg['MODEL_PATH'])
        return eval_model