예제 #1
0
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer):
    if network['name'] == 'resnet20': 
        lr_per_mb = [1.0]*80+[0.1]*40+[0.01]
    elif network['name'] == 'resnet110': 
        lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01]
    else: 
        return RuntimeError("Unknown model name!")

    momentum_time_constant = -minibatch_size/np.log(0.9)
    l2_reg_weight = 0.0001

    # Set learning parameters
    lr_per_sample = [lr/minibatch_size for lr in lr_per_mb]
    lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)
    
    # learner object
    if block_size != None and num_quantization_bits != 32:
        raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")

    local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule,
                                 l2_regularization_weight = l2_reg_weight)

    if block_size != None:
        learner = block_momentum_distributed_learner(local_learner, block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)
    
    return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_printer)
def create_trainer(network, epoch_size, num_quantization_bits, block_size,
                   warm_up):
    # Instantiate the trainer object to drive the model training
    lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch)
    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    clipping_threshold_per_sample = 2.3
    gradient_clipping_with_truncation = True

    # Create learner
    if block_size is not None and num_quantization_bits != default_quantization_bits:
        raise RuntimeError(
            "Block momentum cannot be used with quantization, please remove quantized_bits option."
        )

    local_learner = momentum_sgd(
        network['output'].parameters,
        lr_per_minibatch,
        momentum_time_constant,
        gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
        gradient_clipping_with_truncation=gradient_clipping_with_truncation)

    if block_size != None:
        learner = block_momentum_distributed_learner(local_learner,
                                                     block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(
            local_learner,
            num_quantization_bits=num_quantization_bits,
            distributed_after=warm_up)

    return Trainer(network['output'], (network['ce'], network['pe']), learner)
예제 #3
0
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits):
    if network['name'] == 'resnet20':
        lr_per_mb = [1.0] * 80 + [0.1] * 40 + [0.01]
    elif network['name'] == 'resnet110':
        lr_per_mb = [0.1] * 1 + [1.0] * 80 + [0.1] * 40 + [0.01]
    else:
        return RuntimeError("Unknown model name!")

    momentum_time_constant = -minibatch_size / np.log(0.9)
    l2_reg_weight = 0.0001

    # Set learning parameters
    lr_per_sample = [lr / minibatch_size for lr in lr_per_mb]
    lr_schedule = learning_rate_schedule(lr_per_sample,
                                         epoch_size=epoch_size,
                                         unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    # learner object
    local_learner = momentum_sgd(network['output'].parameters,
                                 lr_schedule,
                                 mm_schedule,
                                 unit_gain=True,
                                 l2_regularization_weight=l2_reg_weight)

    learner = data_parallel_distributed_learner(
        learner=local_learner,
        num_quantization_bits=num_quantization_bits,
        distributed_after=0)
    return Trainer(network['output'], network['ce'], network['pe'], learner)
예제 #4
0
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer):
    lr_per_mb = [0.1] # [1.0]*30 + [0.1]*30 + [0.01]*20 + [0.001]
    l2_reg_weight = 0.0001

    # adjust LR with minibatch size
    #if minibatch_size != 256:
    #    for i in range(0, len(lr_per_mb)):
    #        lr_per_mb[i] *= minibatch_size / 256

    # Set learning parameters
    lr_schedule = learning_rate_schedule(lr_per_mb, epoch_size=epoch_size, unit=UnitType.minibatch)
    mm_schedule = momentum_schedule(0.9)

    local_learner = nesterov(network['output'].parameters, lr_schedule, mm_schedule,
                             l2_regularization_weight=l2_reg_weight)

    # learner object
    if block_size != None and num_quantization_bits != 32:
        raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")

    if block_size != None:
        learner = block_momentum_distributed_learner(local_learner, block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up)

    return Trainer(network['output'], (network['ce'], network['errs']), learner, progress_printer)
예제 #5
0
파일: io_tests.py 프로젝트: pospanet/CNTK
def test_usermbsource_training(tmpdir):
    input_dim = 1000
    num_output_classes = 5

    mbs = MyDataSource(input_dim, num_output_classes)

    from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
            classification_error, learning_rate_schedule, sgd, Trainer, \
            training_session, times, UnitType

    feature = sequence.input_variable(shape=(input_dim, ))
    label = C.input_variable(shape=(num_output_classes, ))
    p = parameter(shape=(input_dim, num_output_classes), init=10)
    z = times(sequence.reduce_sum(feature), p, name='z')
    ce = cross_entropy_with_softmax(z, label)
    errs = classification_error(z, label)

    lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0],
                                           UnitType.sample)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])
    input_map = {feature: mbs.fsi, label: mbs.lsi}

    session = training_session(trainer=trainer,
                               mb_source=mbs,
                               model_inputs_to_streams=input_map,
                               mb_size=4,
                               max_samples=20)
    session.train()

    assert trainer.total_number_of_samples_seen == 20
예제 #6
0
def train(reader, model, max_epochs):
    # Input variables denoting the features and label data
    query       = Input(input_dim,  is_sparse=False)  # TODO: make sparse once it works
    slot_labels = Input(num_labels, is_sparse=True)

    # apply model to input
    z = model(query)

    # loss and metric
    ce = cross_entropy_with_softmax(z, slot_labels)
    pe = classification_error      (z, slot_labels)

    # training config
    epoch_size = 36000
    minibatch_size = 70
    num_mbs_to_show_result = 100
    time_constant = minibatch_size / math.log(1/0.9)

    lr_per_sample = [0.003]*2+[0.0015]*12+[0.0003]

    # trainer object
    lr_schedule = learning_rate_schedule(lr_per_sample, units=epoch_size)
    learner = fsadagrad(z.parameters, lr_schedule, time_constant,
                        targetAdagradAvDenom=1, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)

    trainer = Trainer(z, ce, pe, [learner])
    #_extend_Trainer(trainer)  # TODO: should be just baked in

    # define mapping from reader streams to network inputs
    input_map = {
        query       : reader.streams.query,
        slot_labels : reader.streams.slot_labels
    }

    # process minibatches and perform model training
    t = 0
    mbs = 0
    for epoch in range(max_epochs):
        loss_numer = 0  # TODO: find a nicer way of tracking, this is clumsy
        loss_denom = 0
        metric_numer = 0
        metric_denom = 0
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end:
            # BUGBUG: RuntimeError: GetNextMinibatch: Changing minibatch sizes across calls is currently unsupported
            #data, num_samples = next_minibatch(reader, min(minibatch_size, epoch_size-t), input_map)
            data = reader.next_minibatch(minibatch_size, input_map=input_map)
            if data is None:
                break
            trainer.train_minibatch(data)
            loss_numer += trainer.previous_minibatch_loss_average * trainer.previous_minibatch_sample_count  # too much code for something this simple
            loss_denom +=                                           trainer.previous_minibatch_sample_count
            metric_numer += trainer.previous_minibatch_evaluation_average * trainer.previous_minibatch_sample_count
            metric_denom +=                                                 trainer.previous_minibatch_sample_count
            print_training_progress(trainer, mbs if mbs > 10 else 0, num_mbs_to_show_result)
            t += data[slot_labels].num_samples
            mbs += 1
        print("--- EPOCH {} DONE: loss = {:0.6f} * {}, metric = {:0.1f}% * {} ---".format(epoch+1, loss_numer/loss_denom, loss_denom, metric_numer/metric_denom*100.0, metric_denom))

    return loss_numer/loss_denom, metric_numer/metric_denom
예제 #7
0
def create_trainer(loss, pred_error, lr_per_sample, mm_schedule, l2_reg_weight,
                   epochs_to_train, cfg):

    # Set learning parameters
    if isinstance(loss, C.Variable):
        loss = C.combine([loss])

    params = loss.parameters
    biases = [p for p in params if '.b' in p.name or 'b' == p.name]
    others = [p for p in params if not p in biases]

    bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT

    lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
    learner = momentum_sgd(others,
                           lr_schedule,
                           mm_schedule,
                           l2_regularization_weight=l2_reg_weight,
                           unit_gain=False,
                           use_mean_gradient=True)

    bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample]
    bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample,
                                              unit=UnitType.sample)
    bias_learner = momentum_sgd(biases,
                                bias_lr_schedule,
                                mm_schedule,
                                l2_regularization_weight=l2_reg_weight,
                                unit_gain=False,
                                use_mean_gradient=True)

    return Trainer(None, (loss, pred_error), [learner, bias_learner])
def train_model(reader, model, criterion, epoch_size=50000, max_epochs=80):
    minibatch_size = 64

    # learning parameters
    learner = momentum_sgd(model.parameters, 
                           lr       = learning_rate_schedule([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], unit=UnitType.sample, epoch_size=epoch_size),
                           momentum = momentum_as_time_constant_schedule([0]*20+[600]*20+[1200], epoch_size=epoch_size),
                           l2_regularization_weight = 0.002)
    
    # trainer object
    trainer = Trainer(None, criterion, learner)

    # perform model training
    log_number_of_parameters(model) ; print()
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)

    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            mb = reader.next_minibatch(min(minibatch_size, epoch_size - sample_count)) # fetch minibatch.
            #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels])
            trainer.train_minibatch({criterion.arguments[0]: mb[reader.streams.features], criterion.arguments[1]: mb[reader.streams.labels]})
            sample_count += mb[reader.streams.labels].num_samples                     # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

        loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)
        model.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))

    # return evaluation error.
    return loss, metric # return values from last epoch
예제 #9
0
def test_learner_logging():
    from cntk import Trainer
    from cntk.logging import ProgressPrinter
    from cntk import cross_entropy_with_softmax, classification_error

    features = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1,), init=w_init)
    z = features * w
    labels = C.input_variable(shape=(1,), name='b')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    writer = TestProgressWriter();
    lr_values = [0.3, 0.2, 0.1, 0]
    m_values = [0.6, 0.7, 0.8]
    learner = C.momentum_sgd(z.parameters,
                  learning_rate_schedule(lr_values, UnitType.sample, 1),
                  C.momentum_schedule(m_values, 1))
    trainer = Trainer(z, (ce, errs), [learner], writer)

    for i in range(10):
        trainer.train_minibatch({features: [[2.]], labels: [[1.]]})
    
    assert len(writer.log_output) == len(lr_values + m_values)

    values = [j for i in zip(lr_values,m_values) for j in i] + [0]

    for i in range(len(values)):
        assert (values[i] == writer.log_output[i])
def train(reader, model, max_epochs):
    # Input variables denoting the features and label data
    query       = Input(input_dim,  is_sparse=False)
    slot_labels = Input(num_labels, is_sparse=True)  # TODO: make sparse once it works

    # apply model to input
    z = model(query)

    # loss and metric
    ce = cross_entropy_with_softmax(z, slot_labels)
    pe = classification_error      (z, slot_labels)

    # training config
    epoch_size = 36000
    minibatch_size = 70
    num_mbs_to_show_result = 100
    momentum_time_constant = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9))  # TODO: Change to round number. This is 664.39. 700?

    lr_schedule = [0.003]*2+[0.0015]*12+[0.0003] # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values)

    # trainer object
    lr_per_sample = learning_rate_schedule(lr_schedule, UnitType.sample, epoch_size)
    learner = adam_sgd(z.parameters,
                       lr=lr_per_sample, momentum=momentum_time_constant,
                       low_memory=True,
                       gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)

    trainer = Trainer(z, ce, pe, [learner])

    # define mapping from reader streams to network inputs
    input_map = {
        query       : reader.streams.query,
        slot_labels : reader.streams.slot_labels
    }

    # process minibatches and perform model training
    log_number_of_parameters(z) ; print()
    progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging
    #progress_printer = ProgressPrinter(tag='Training')

    t = 0
    for epoch in range(max_epochs):         # loop over epochs
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end:               # loop over minibatches on the epoch
            # BUGBUG? The change of minibatch_size parameter vv has no effect.
            data = reader.next_minibatch(min(minibatch_size, epoch_end-t), input_map=input_map) # fetch minibatch
            trainer.train_minibatch(data)                                   # update model with it
            t += trainer.previous_minibatch_sample_count                    # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
            #def trace_node(name):
            #    nl = [n for n in z.parameters if n.name() == name]
            #    if len(nl) > 0:
            #        print (name, np.asarray(nl[0].value))
            #trace_node('W')
            #trace_node('stabilizer_param')
        loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True)

    return loss, metric
예제 #11
0
def train_sequence_classifier():
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes,
                           dynamic_axes=[Axis.default_batch_axis()])

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifer_net(features,
                                                    num_output_classes,
                                                    embedding_dim, hidden_dim,
                                                    cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
    feature_stream_name = 'features'
    labels_stream_name = 'labels'

    mb_source = text_format_minibatch_source(path, [
        StreamConfiguration(feature_stream_name, input_dim, True, 'x'),
        StreamConfiguration(labels_stream_name, num_output_classes, False, 'y')
    ], 0)

    features_si = mb_source.stream_info(features)
    labels_si = mb_source.stream_info(label)

    # Instantiate the trainer object to drive the model training
    lr = lr = learning_rates_per_sample(0.0005)
    trainer = Trainer(classifier_output, ce, pe,
                      [sgd_learner(classifier_output.owner.parameters(), lr)])

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200
    training_progress_output_freq = 10
    i = 0
    while True:
        mb = mb_source.get_next_minibatch(minibatch_size)
        if len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {
            features: mb[features_si].m_data,
            label: mb[labels_si].m_data
        }
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)

        i += 1
예제 #12
0
def Evaluator(criterion):
    loss, metric = Trainer._get_loss_metric(criterion)
    parameters = set(loss.parameters)
    if metric:
        parameters |= set(metric.parameters)
    dummy_learner = momentum_sgd(tuple(parameters),
                                 lr=learning_parameter_schedule(1),
                                 momentum=momentum_schedule(0))
    return Trainer(None, (loss, metric), dummy_learner)
예제 #13
0
파일: CifarResNet.py 프로젝트: hintz/CNTK
def cifar_resnet():
    dev = 0
    cntk_dev = cntk_device(dev)
    epoch_size = sys.maxsize
    mbs = create_mb_source(epoch_size)
    stream_infos = mbs.stream_infos()
    for si in stream_infos:
        if si.m_name == 'features':
            features_si = si
        elif si.m_name == 'labels':
            labels_si = si

    image_shape = features_si.m_sample_layout.dimensions()
    image_shape = (image_shape[2], image_shape[0], image_shape[1])

    num_classes = labels_si.m_sample_layout.dimensions()[0]

    image_input = variable(image_shape,
                           features_si.m_element_type,
                           needs_gradient=False,
                           name="Images")
    classifier_output = resnet_classifer(image_input, num_classes, dev,
                                         "classifierOutput")
    label_var = variable((num_classes),
                         features_si.m_element_type,
                         needs_gradient=False,
                         name="Labels")

    ce = cross_entropy_with_softmax(classifier_output, label_var)
    pe = classification_error(classifier_output, label_var)

    #TODO: add save and load module code
    image_classifier = combine([ce, pe, classifier_output], "ImageClassifier")

    lr = learning_rates_per_sample(0.0078125)

    mb_size = 32
    num_mbs = 1000

    trainer = Trainer(classifier_output, ce, pe,
                      [sgdlearner(classifier_output.owner.parameters(), lr)])

    for i in range(0, num_mbs):
        mb = mbs.get_next_minibatch(mb_size, cntk_dev)

        arguments = dict()
        arguments[image_input] = mb[features_si].m_data
        arguments[label_var] = mb[labels_si].m_data

        trainer.train_minibatch(arguments, cntk_dev)
        freq = 20
        if i % freq == 0:
            training_loss = get_train_loss(trainer)
            eval_crit = get_train_eval_criterion(trainer)
            print(
                "Minibatch: {}, Train Loss: {}, Train Evaluation Criterion: {}"
                .format(i, training_loss, eval_crit))
def Evaluator(criterion):
    loss, metric = Trainer._get_loss_metric(criterion)
    parameters = set(loss.parameters)
    if metric:
        parameters |= set(metric.parameters)
    dummy_learner = momentum_sgd(tuple(parameters), 
                                 lr = learning_rate_schedule(1, UnitType.minibatch),
                                 momentum = momentum_as_time_constant_schedule(0))
    return Trainer(None, (loss, metric), dummy_learner)
예제 #15
0
def train_sequence_classifier(debug_output=False):
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes,
                           dynamic_axes=[Axis.default_batch_axis()])

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifer_net(features,
                                                    num_output_classes,
                                                    embedding_dim, hidden_dim,
                                                    cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        features: reader.streams.features,
        label: reader.streams.labels
    }

    lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample)
    # Instantiate the trainer object to drive the model training
    trainer = Trainer(classifier_output, (ce, pe),
                      sgd(classifier_output.parameters, lr=lr_per_sample))

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200
    training_progress_output_freq = 10

    if debug_output:
        training_progress_output_freq = training_progress_output_freq / 3

    for i in range(251):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)
        print_training_progress(trainer, i, training_progress_output_freq)

    import copy

    evaluation_average = copy.copy(
        trainer.previous_minibatch_evaluation_average)
    loss_average = copy.copy(trainer.previous_minibatch_loss_average)

    return evaluation_average, loss_average
예제 #16
0
def finalize_network(reader, model_details, max_amount_of_epochs,
                     samples_per_epoch, samples_per_minibatch,
                     pixel_dimensions, classes, learning_rate):
    features = input_variable(shape=(pixel_dimensions['depth'],
                                     pixel_dimensions['height'],
                                     pixel_dimensions['width']))
    label = input_variable(shape=len(classes))

    # speeds up training
    normalized_features = element_times(1.0 / 256.0, features)

    model = create_tf_model(model_details,
                            num_classes=len(classes),
                            input_features=normalized_features,
                            freeze=True)

    loss = cross_entropy_with_softmax(model, label)
    metric = classification_error(model, label)
    learner = momentum_sgd(parameters=model.parameters,
                           lr=learning_rate_schedule(learning_rate,
                                                     UnitType.minibatch),
                           momentum=0.9,
                           l2_regularization_weight=0.0005)

    reporter = ProgressPrinter(tag='training', num_epochs=max_amount_of_epochs)

    trainer = Trainer(model=model,
                      criterion=(loss, metric),
                      parameter_learners=[learner],
                      progress_writers=[reporter])

    log_number_of_parameters(model)

    map_input_to_streams_train = {
        features: reader.streams.features,
        label: reader.streams.labels
    }

    training_session(trainer=trainer,
                     mb_source=reader,
                     model_inputs_to_streams=map_input_to_streams_train,
                     mb_size=samples_per_minibatch,
                     progress_frequency=samples_per_epoch,
                     checkpoint_config=CheckpointConfig(
                         frequency=samples_per_epoch,
                         filename=os.path.join("./checkpoints",
                                               "ConvNet_Lego_VisiOn"),
                         restore=True)).train()
    network = {'features': features, 'label': label, 'model': softmax(model)}
    model_name = f"CNN-3200-224-resnet-18.model"
    export_path = os.path.abspath(
        os.path.join("..", "..", "Final models", "CNN", model_name))
    model.save(export_path)
    return network
예제 #17
0
def train_fast_rcnn(debug_output=False):
    if debug_output:
        print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output"))

    # Create the minibatch source
    minibatch_source = create_mb_source(image_height, image_width, num_channels,
                                        num_classes, num_rois, base_path, "train")

    # Input variables denoting features, rois and label data
    image_input = input_variable((num_channels, image_height, image_width))
    roi_input   = input_variable((num_rois, 4))
    label_input = input_variable((num_rois, num_classes))

    # define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source[features_stream_name],
        roi_input: minibatch_source[roi_stream_name],
        label_input: minibatch_source[label_stream_name]
    }

    # Instantiate the Fast R-CNN prediction model and loss function
    frcn_output = frcn_predictor(image_input, roi_input, num_classes)
    ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1)
    pe = classification_error(frcn_output, label_input, axis=1)
    if debug_output:
        plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png"))

    # Set learning parameters
    l2_reg_weight = 0.0005
    lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001]
    lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    # Instantiate the trainer object
    learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
    trainer = Trainer(frcn_output, (ce, pe), learner)

    # Get minibatches of images and perform model training
    print("Training Fast R-CNN model for %s epochs." % max_epochs)
    log_number_of_parameters(frcn_output)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map)
            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True)  # log progress

        progress_printer.epoch_summary(with_metric=True)
        if debug_output:
            frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1)))

    return frcn_output
예제 #18
0
def simple_mnist():
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 1
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    input = input_variable(input_dim, np.float32)
    label = input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant((), 0.00390625), input)
    netout = fully_connected_classifier_net(scaled_input, num_output_classes,
                                            hidden_layers_dim,
                                            num_hidden_layers, sigmoid)

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    rel_path = r"../../../../Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
    feature_stream_name = 'features'
    labels_stream_name = 'labels'

    mb_source = text_format_minibatch_source(path, [
        StreamConfiguration(feature_stream_name, input_dim),
        StreamConfiguration(labels_stream_name, num_output_classes)
    ])
    features_si = mb_source.stream_info(feature_stream_name)
    labels_si = mb_source.stream_info(labels_stream_name)

    # Instantiate the trainer object to drive the model training
    lr = learning_rates_per_sample(0.003125)
    trainer = Trainer(netout, ce, pe,
                      [sgd_learner(netout.owner.parameters(), lr)])

    # Get minibatches of images to train with and perform model training
    minibatch_size = 32
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 1
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) / minibatch_size
    training_progress_output_freq = 20
    for i in range(0, int(num_minibatches_to_train)):
        mb = mb_source.get_next_minibatch(minibatch_size)

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {
            input: mb[features_si].m_data,
            label: mb[labels_si].m_data
        }
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)
def train_model(debug_output=False):
    # Create the minibatch source
    minibatch_source = create_reader(map_file)

    # Input variables denoting features, rois and label data
    image_input = input_variable((num_channels, image_height, image_width))
    label_input = input_variable((num_classes))

    # define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source.streams.features,
        label_input: minibatch_source.streams.labels
    }

    # Instantiate the Fast R-CNN prediction model and loss function
    model = modify_model(image_input, num_classes)
    ce = cross_entropy_with_softmax(model, label_input)
    pe = classification_error(model, label_input)

    # Set learning parameters
    l2_reg_weight = 0.0005
    lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001]
    momentum_time_constant = 10
    lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
    mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)

    # Instantiate the trainer object
    progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)]
    learner = momentum_sgd(model.parameters,
                           lr_schedule,
                           mm_schedule,
                           l2_regularization_weight=l2_reg_weight)
    trainer = Trainer(model, (ce, pe), learner, progress_writers)

    # Get minibatches of images and perform model training
    print("Training image classifier for %s epochs." % max_epochs)
    log_number_of_parameters(model)

    for epoch in range(max_epochs):
        sample_count = 0
        while sample_count < epoch_size:
            data = minibatch_source.next_minibatch(min(
                mb_size, epoch_size - sample_count),
                                                   input_map=input_map)
            trainer.train_minibatch(data)
            sample_count += trainer.previous_minibatch_sample_count

        trainer.summarize_training_progress()
        model.save(
            os.path.join(output_model_folder,
                         'withcrops_{}.dnn'.format(epoch + 1)))

    return
예제 #20
0
def Evaluator(model, criterion):
    from cntk import Trainer
    from cntk.learners import momentum_sgd, momentum_schedule_per_sample
    loss, metric = Trainer._get_loss_metric(criterion)
    parameters = set(loss.parameters)
    if model:
        parameters |= set(model.parameters)
    if metric:
        parameters |= set(metric.parameters)
    dummy_learner = momentum_sgd(tuple(parameters),
                                 lr=learning_parameter_schedule(1),
                                 momentum=momentum_schedule_per_sample(0))
    return Trainer(model, (loss, metric), dummy_learner)
def Evaluator(model, criterion):
    from cntk import Trainer
    from cntk.learners import momentum_sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule
    loss, metric = Trainer._get_loss_metric(criterion)
    parameters = set(loss.parameters)
    if model:
        parameters |= set(model.parameters)
    if metric:
        parameters |= set(metric.parameters)
    dummy_learner = momentum_sgd(tuple(parameters),
                                 lr = learning_rate_schedule(1, UnitType.minibatch),
                                 momentum = momentum_as_time_constant_schedule(0))
    return Trainer(model, (loss, metric), dummy_learner)
예제 #22
0
def train_and_test(s2smodel, train_reader, test_reader, block_size,
                   num_quantization_bits, max_epochs, epoch_size,
                   minibatch_size, progress_printer, warm_up):
    from Sequence2Sequence import create_criterion_function, create_model_train
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)

    # Create learner
    if block_size is not None and num_quantization_bits != default_quantization_bits:
        raise RuntimeError(
            "Block momentum cannot be used with quantization, please remove quantized_bits option."
        )

    lr = 0.001 if use_attention else 0.005  # TODO: can we use the same value for both?
    local_learner = fsadagrad(
        model_train.parameters,
        lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4],
                                  UnitType.sample, epoch_size),
        momentum=momentum_as_time_constant_schedule(1100),
        gradient_clipping_threshold_per_sample=2.3,
        gradient_clipping_with_truncation=True)

    if block_size != None:
        learner = block_momentum_distributed_learner(local_learner,
                                                     block_size=block_size)
    else:
        learner = data_parallel_distributed_learner(
            local_learner,
            num_quantization_bits=num_quantization_bits,
            distributed_after=warm_up)

    trainer = Trainer(None, criterion, learner, progress_printer)

    train_bind = {
        criterion.arguments[0]: train_reader.streams.features,
        criterion.arguments[1]: train_reader.streams.labels
    }

    training_session(
        mb_source=train_reader,
        trainer=trainer,
        model_inputs_to_streams=train_bind,
        mb_size=minibatch_size,
        progress_frequency=epoch_size,
        checkpoint_config=CheckpointConfig(frequency=epoch_size,
                                           filename=os.path.join(
                                               model_path,
                                               "SequenceToSequence"),
                                           restore=False),
        cv_config=CrossValidationConfig(source=test_reader,
                                        mb_size=minibatch_size)).train()
예제 #23
0
def ffnet():
    input_dim = 2
    num_output_classes = 2
    num_hidden_layers = 2
    hidden_layers_dim = 50
    epoch_size = sys.maxsize
    minibatch_size = 25
    num_samples_per_sweep = 10000
    num_sweeps_to_train_with = 2
    num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size
    lr = learning_rates_per_sample(0.02)
    input = variable((input_dim,), np.float32, needs_gradient=False, name="features")
    label = variable((num_output_classes,), np.float32, needs_gradient=False, name="labels")
    dev = -1
    cntk_dev = cntk_device(dev)
    netout = fully_connected_classifier_net(input, num_output_classes, hidden_layers_dim, num_hidden_layers, dev, sigmoid)

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)
    #TODO: add save and load module code
    ffnet = combine([ce, pe, netout], "classifier_model")

    rel_path = r"../../../../Examples/Other/Simple2d/Data/SimpleDataTrain_cntk_text.txt"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
    cm = create_text_mb_source(path, input_dim, num_output_classes, epoch_size)

    stream_infos = cm.stream_infos()

    for si in stream_infos:
        if si.m_name == 'features':
            features_si = si
        elif si.m_name == 'labels':
            labels_si = si

    trainer = Trainer(netout, ce, pe, [sgdlearner(netout.owner.parameters(), lr)])

    for i in range(0,int(num_minibatches_to_train)):
        mb=cm.get_next_minibatch(minibatch_size, cntk_dev)

        arguments = dict()
        arguments[input] = mb[features_si].m_data
        arguments[label] = mb[labels_si].m_data

        trainer.train_minibatch(arguments, cntk_dev)
        freq = 20
        if i % freq == 0:
            training_loss = get_train_loss(trainer)
            eval_crit = get_train_eval_criterion(trainer)
            print ("Minibatch: {}, Train Loss: {}, Train Evaluation Criterion: {}".format(i, training_loss, eval_crit))
예제 #24
0
def ffnet(debug_output=False):
    input_dim = 2
    num_output_classes = 2
    num_hidden_layers = 2
    hidden_layers_dim = 50

    # Input variables denoting the features and label data
    input = input_variable((input_dim), np.float32)
    label = input_variable((num_output_classes), np.float32)

    # Instantiate the feedforward classification model
    netout = fully_connected_classifier_net(input, num_output_classes,
                                            hidden_layers_dim,
                                            num_hidden_layers, sigmoid)

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(netout, ce, pe, [sgd(netout.parameters(), lr=0.02)])

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_samples_per_sweep = 10000
    num_sweeps_to_train_with = 2
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) / minibatch_size
    training_progress_output_freq = 60

    if debug_output:
        training_progress_output_freq = training_progress_output_freq / 3

    for i in range(0, int(num_minibatches_to_train)):
        features, labels = generate_random_data(minibatch_size, input_dim,
                                                num_output_classes)
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({input: features, label: labels})
        print_training_progress(trainer, i, training_progress_output_freq)

    test_features, test_labels = generate_random_data(minibatch_size,
                                                      input_dim,
                                                      num_output_classes)
    avg_error = trainer.test_minibatch({
        input: test_features,
        label: test_labels
    })
    return avg_error
예제 #25
0
파일: simplernn.py 프로젝트: zli12/CNTK
def train_sequence_classifier():
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = sequence.input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes)

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifier_net(features,
                                                     num_output_classes,
                                                     embedding_dim, hidden_dim,
                                                     cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = ("../../../Tests/EndToEndTests/Text/" +
                "SequenceClassification/Data/Train.ctf")
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        features: reader.streams.features,
        label: reader.streams.labels
    }

    lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample)
    # Instantiate the trainer object to drive the model training
    progress_printer = ProgressPrinter(0)
    trainer = Trainer(classifier_output, (ce, pe),
                      sgd(classifier_output.parameters, lr=lr_per_sample),
                      progress_printer)

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200

    for i in range(255):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)

    evaluation_average = float(trainer.previous_minibatch_evaluation_average)
    loss_average = float(trainer.previous_minibatch_loss_average)
    return evaluation_average, loss_average
예제 #26
0
파일: debug_test.py 프로젝트: zli12/CNTK
def _train(z, loss, eval_error, f_input, l_input, num_output_classes, steps):
    np.random.seed(0)

    input_dim = 2

    lr_schedule = C.learning_parameter_schedule(0.5)
    #now we want the learning be compatible with the way in the literature without the per sample benefit:
    learner = sgd(z.parameters, lr_schedule, minibatch_size=C.learners.IGNORE)
    trainer = Trainer(z, (loss, eval_error), [learner])

    minibatch_size = 10

    for i in range(steps):
        features, labels = _generate_random_data_sample(
            minibatch_size, input_dim, num_output_classes)

        trainer.train_minibatch({f_input: features, l_input: labels})
예제 #27
0
def test_usermbsource_training(tmpdir, with_checkpoint_impl):
    input_dim = 1000
    num_output_classes = 5

    mbs = MyDataSource(input_dim, num_output_classes)
    # Using this for testing the UserMinibatchSource checkpointing
    if with_checkpoint_impl:
        MBS_CV_CLASS = MyDataSourceWithCheckpoint
    else:
        MBS_CV_CLASS = MyDataSource

    mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes)

    from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
            classification_error, learning_rate_schedule, sgd, Trainer, \
            training_session, times, UnitType

    feature = sequence.input_variable(shape=(input_dim,))
    label = C.input_variable(shape=(num_output_classes,))
    p = parameter(shape=(input_dim, num_output_classes), init=10)
    z = times(sequence.reduce_sum(feature), p, name='z')
    ce = cross_entropy_with_softmax(z, label)
    errs = classification_error(z, label)

    #having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed
    #note that training session can end earlier if there is no updates
    lr_per_sample = learning_rate_schedule(0.3, UnitType.sample)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])
    input_map = {
        feature: mbs.fsi,
        label: mbs.lsi
    }

    session = training_session(
        trainer=trainer, mb_source=mbs,
        model_inputs_to_streams=input_map,
        mb_size=4, max_samples=20,
        cv_config = C.CrossValidationConfig(minibatch_source=mbs_cv, max_samples=10,
            minibatch_size=2)
    )
    session.train()

    assert trainer.total_number_of_samples_seen == 20
    if with_checkpoint_impl:
        assert mbs_cv._restore_from_checkpoint_calls == 1
예제 #28
0
def _train(z, loss, eval_error, f_input, l_input, num_output_classes, steps):
    np.random.seed(0)

    input_dim = 2

    lr_schedule = learning_rate_schedule(0.5, UnitType.minibatch)

    learner = sgd(z.parameters, lr_schedule)
    trainer = Trainer(z, (loss, eval_error), [learner])

    minibatch_size = 10

    for i in range(steps):
        features, labels = _generate_random_data_sample(
            minibatch_size, input_dim, num_output_classes)

        trainer.train_minibatch({f_input: features, l_input: labels})
예제 #29
0
def train_model(base_model_file, feature_node_name, last_hidden_node_name,
                image_width, image_height, num_channels, num_classes, train_map_file,
                num_epochs, max_images=-1, freeze=False):
    epoch_size = sum(1 for line in open(train_map_file))
    if max_images > 0:
        epoch_size = min(epoch_size, max_images)

    # Create the minibatch source and input variables
    minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes)
    image_input = C.input_variable((num_channels, image_height, image_width))
    label_input = C.input_variable(num_classes)

    # Define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source[features_stream_name],
        label_input: minibatch_source[label_stream_name]
    }

    # Instantiate the transfer learning model and loss function
    tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze)
    ce = cross_entropy_with_softmax(tl_model, label_input)
    pe = classification_error(tl_model, label_input)

    # Instantiate the trainer object
    lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch)
    mm_schedule = momentum_schedule(momentum_per_mb)
    learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs)
    trainer = Trainer(tl_model, (ce, pe), learner, progress_printer)

    # Get minibatches of images and perform model training
    print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size))
    log_number_of_parameters(tl_model)
    for epoch in range(num_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map)
            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
            if sample_count % (100 * mb_size) == 0:
                print("Processed {0} samples".format(sample_count))

        trainer.summarize_training_progress()

    return tl_model
예제 #30
0
파일: trainer1.py 프로젝트: osmr/utct
    def _hyper_train_target_sub(self, **kwargs):
        """
        Actual training procedure for specific set of hyper parameters.
        """

        if self.saver.log_filename:
            fh = logging.FileHandler(self.saver.log_filename)
            self.logger.addHandler(fh)

        self.logger.info("Training with parameters: {}".format(kwargs))

        X_train, Y_train, X_val, Y_val = self.data_source(**kwargs)

        input_var, label_var, output = self.model(**kwargs)

        loss = cross_entropy_with_softmax(output, label_var)
        label_error = classification_error(output, label_var)

        learner = self.optimizer(
            parameters=output.parameters,
            momentum=0.9,
            **kwargs)

        progress_printer = ProgressPrinter(tag='Training', num_epochs=self.num_epoch)
        trainer = Trainer(output, (loss, label_error), [learner], [progress_printer])

        # input_map = {
        #     input_var: reader_train.streams.features,
        #     label_var: reader_train.streams.labels
        # }

        num_minibatches_to_train = X_train.shape[0] / self.data_source.batch_size
        for i in range(0, int(num_minibatches_to_train)):
            features = X_train[:self.data_source.batch_size]
            labels = Y_train[:self.data_source.batch_size]
            trainer.train_minibatch({input_var: features, label_var: labels})


        if self.saver.log_filename:
            self.logger.removeHandler(fh)
            fh.close()

        best_value = 0.0

        return best_value