def implementing_1d_convnet_cntk():
    max_features = 10000  # number of words to consider as features
    max_len = 500  # cut texts after this number of words (among top max_features most common words)
    x_train, y_train, x_test, y_test = load_data(max_features, max_len)

    model = build_model_cntk(max_features, max_len)
    x = cntk.input_variable(shape=(max_len, ), dtype=np.float32)
    y = cntk.input_variable(shape=(1, ), dtype=np.float32)
    model.replace_placeholders({model.placeholders[0]: x})

    loss_function = cntk.binary_cross_entropy(model.output, y)
    round_predictions = cntk.round(model.output)
    equal_elements = cntk.equal(round_predictions, y)
    accuracy_function = cntk.reduce_mean(equal_elements, axis=0)

    max_epochs = 10
    batch_size = 32
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.0001),
                        cntk.learning_parameter_schedule_per_sample(0.99))
    progress_printer = cntk.logging.ProgressPrinter(tag='Training',
                                                    num_epochs=max_epochs)
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner], progress_printer)
    evaluator = cntk.Evaluator(accuracy_function)

    cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer,
               evaluator)
def learning_word_embeddings_with_the_embedding_layer_cntk():
    x_train, y_train, x_test, y_test = load_from_files()

    max_features = 10000
    maxlen = 20
    embedding_dim = 8

    x = cntk.input_variable(shape=(maxlen, ), dtype=np.float32)
    y = cntk.input_variable(shape=(1, ), dtype=np.float32)
    model = cntk.one_hot(x, num_classes=max_features, sparse_output=True)
    model = cntk.layers.Embedding(embedding_dim)(model)
    model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model)
    loss_function = cntk.binary_cross_entropy(model.output, y)
    round_predictions = cntk.round(model.output)
    equal_elements = cntk.equal(round_predictions, y)
    accuracy_function = cntk.reduce_mean(equal_elements, axis=0)

    max_epochs = 30
    batch_size = 32
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.0001),
                        cntk.learning_parameter_schedule_per_sample(0.99))
    progress_printer = cntk.logging.ProgressPrinter(tag='Training',
                                                    num_epochs=max_epochs)
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner], progress_printer)
    evaluator = cntk.Evaluator(accuracy_function)

    cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer,
               evaluator)
def build_graph(noise_shape, image_shape, G_progress_printer, D_progress_printer):
    input_dynamic_axes = [C.Axis.default_batch_axis()]
    Z = C.input_variable(noise_shape, dynamic_axes=input_dynamic_axes)
    X_real = C.input_variable(image_shape, dynamic_axes=input_dynamic_axes)
    X_real_scaled = 2*(X_real / 255.0) - 1.0

    # Create the model function for the generator and discriminator models
    X_fake = generator(Z)
    D_real = discriminator(X_real_scaled)
    D_fake = D_real.clone(
        method = 'share',
        substitutions = {X_real_scaled.output: X_fake.output}
    )

    # Create loss functions and configure optimazation algorithms
    G_loss = 1.0 - C.log(D_fake)
    D_loss = -(C.log(D_real) + C.log(1.0 - D_fake))

    G_learner = C.fsadagrad(
       parameters = X_fake.parameters,
        lr = C.learning_parameter_schedule_per_sample(lr),
        momentum = C.momentum_schedule_per_sample(0.9985724484938566)
    )
    D_learner = C.fsadagrad(
        parameters = D_real.parameters,
        lr = C.learning_parameter_schedule_per_sample(lr),
        momentum = C.momentum_schedule_per_sample(0.9985724484938566)
    )

    DistG_learner = C.train.distributed.data_parallel_distributed_learner(G_learner)
    
    # The following API marks a learner as the matric aggregator, which is used by 
    # the trainer to determine the training progress.
    # It is required, only when more than one learner is provided to a *single* trainer. 
    # In this example, we use two trainers each with a single learner, so it 
    # is not required and automatically set by CNTK for each single learner. However, if you 
    # plan to use both learners with a single trainer, then it needs to be call before 
    # creating the trainer.
    #DistG_learner.set_as_metric_aggregator()

    DistD_learner = C.train.distributed.data_parallel_distributed_learner(D_learner)

    # Instantiate the trainers
    G_trainer = C.Trainer(
        X_fake,
        (G_loss, None),
        DistG_learner,
        G_progress_printer
    )
    D_trainer = C.Trainer(
        D_real,
        (D_loss, None),
        DistD_learner,
        D_progress_printer
    )

    return X_real, X_fake, Z, G_trainer, D_trainer
def run_cntk():
    text, chars, char_indices, x_train, y_train = get_data(one_hot_encode_features=False)
    alphabet_size = len(chars)
    print('alphabet_size=', alphabet_size)
    model = build_model_cntk(alphabet_size=alphabet_size)
    model_filename = 'ch8-1_cntk.model'
    model.save(model_filename)
    model = None
    model = cntk.load_model(model_filename)

    x = cntk.sequence.input_variable(shape=(), dtype=np.float32)
    y = cntk.input_variable(shape=(), dtype=np.float32)
    model.replace_placeholders({model.placeholders[0]: x})

    y_oneHot = cntk.one_hot(y, num_classes=alphabet_size)
    loss_function = cntk.cross_entropy_with_softmax(model.output, y_oneHot)
    learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.001), cntk.learning_parameter_schedule_per_sample(0.9))
    trainer = cntk.Trainer(model, (loss_function, loss_function), [learner],)

    for epoch in range(1, 60):
        print('epoch', epoch)
        cntk_train(x, y, x_train, y_train, max_epochs=32, batch_size=128, trainer=trainer)
        model_filename = 'final_ch8-1_cntk.model'
        model.save(model_filename)
        generate_text_cntk(char_indices, chars, model, text)
def test_noise_injection_with_checkpointing():
    from cntk import initializer
    shape = (100,100)

    w1 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))

    lr=C.learning_parameter_schedule_per_sample(0.5)
    m=C.momentum_schedule(0.99)

    learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner2 = C.momentum_sgd([w2], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner3 = C.momentum_sgd([w3], lr, m, gaussian_noise_injection_std_dev=0.5)

    assert np.allclose(w1.value, w2.value) and np.allclose(w1.value, w3.value)

    for i in range(10):
        checkpoint = learner1.create_checkpoint()

        v =  np.float32(np.random.rand(100,100))

        learner1.update({w1: v}, 1)
        learner2.update({w2: v}, 1)
        assert not np.allclose(w1.value, w2.value)

        learner3.restore_from_checkpoint(checkpoint)
        learner3.update({w3: v}, 1)
        assert np.allclose(w1.value, w3.value)
def test_learner_logging():
    from cntk import Trainer
    from cntk.logging import ProgressPrinter
    from cntk import cross_entropy_with_softmax, classification_error

    features = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1,), init=w_init)
    z = features * w
    labels = C.input_variable(shape=(1,), name='b')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    writer = TestProgressWriter();
    lr_values = [0.3, 0.2, 0.1, 0]
    m_values = [0.6, 0.7, 0.8]
    learner = C.momentum_sgd(z.parameters,
                  C.learning_parameter_schedule_per_sample(lr_values, epoch_size=1),
                  C.momentum_schedule(m_values, epoch_size=1))
    trainer = Trainer(z, (ce, errs), [learner], writer)

    for i in range(10):
        trainer.train_minibatch({features: [[2.]], labels: [[1.]]})

    assert len(writer.log_output) == len(lr_values + m_values)

    values = [j for i in zip(lr_values,m_values) for j in i] + [0]

    for i in range(len(values)):
        assert (values[i] == writer.log_output[i])
Exemple #7
0
def train_lm(testing=False):
    data = DataReader(token_to_id_path, segment_sepparator)

    # Create model nodes for the source and target inputs
    input_sequence, label_sequence = create_inputs(data.vocab_dim)

    # Create the model. It has three output nodes
    # z: the input to softmax that  provides the latent representation of the next token
    # cross_entropy: this is used training criterion
    # error: this a binary indicator if the model predicts the correct token
    z, cross_entropy, error = create_model(input_sequence, label_sequence, data.vocab_dim, hidden_dim)

    # For measurement we use the (build in) full softmax.
    full_ce = C.cross_entropy_with_softmax(z, label_sequence)

    # print out some useful training information
    log_number_of_parameters(z) ; print()
    
    # Run the training loop
    num_trained_samples = 0
    num_trained_samples_since_last_report = 0

    # Instantiate the trainer object to drive the model training
    lr_schedule = C.learning_parameter_schedule_per_sample(learning_rate)
    momentum_schedule = C.momentum_schedule_per_sample(momentum_per_sample)
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule,
                            gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
                            gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    trainer = Trainer(z, (cross_entropy, error), learner)

    last_avg_ce = 0
    for epoch_count in range(num_epochs):
        for features, labels, token_count in data.minibatch_generator(train_file_path, sequence_length, sequences_per_batch):
            arguments = ({input_sequence : features, label_sequence : labels})

            t_start = timeit.default_timer()
            trainer.train_minibatch(arguments)
            t_end =  timeit.default_timer()

            samples_per_second = token_count / (t_end - t_start)

            # Print progress report every num_samples_between_progress_report samples

            if num_trained_samples_since_last_report >= num_samples_between_progress_report or num_trained_samples == 0:
                av_ce = average_cross_entropy(full_ce, input_sequence, label_sequence, data)
                print_progress(samples_per_second, av_ce, num_trained_samples, t_start)
                num_trained_samples_since_last_report = 0
                last_avg_ce = av_ce

            num_trained_samples += token_count
            num_trained_samples_since_last_report += token_count

        if not testing:
            # after each epoch save the model
            model_filename = "models/lm_epoch%d.dnn" % epoch_count
            z.save(model_filename)
            print("Saved model to '%s'" % model_filename)

    return last_avg_ce
def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up, progress_writers):
    # Set learning parameters
    lr_per_sample     = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
    lr_schedule       = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
    mms               = [0]*20 + [0.9983347214509387]*20 + [0.9991670137924583]
    mm_schedule       = C.learners.momentum_schedule_per_sample(mms, epoch_size=epoch_size)
    l2_reg_weight     = 0.002

    # Create learner
    if block_size != None and num_quantization_bits != 32:
        raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.")

    local_learner = C.learners.momentum_sgd(network['output'].parameters,
                                            lr_schedule, mm_schedule,
                                            l2_regularization_weight=l2_reg_weight)

    if block_size != None:
        parameter_learner = C.train.distributed.block_momentum_distributed_learner(local_learner, block_size=block_size)
    else:
        parameter_learner = C.train.distributed.data_parallel_distributed_learner(local_learner, 
                                                                                  num_quantization_bits=num_quantization_bits, 
                                                                                  distributed_after=warm_up)

    # Create trainer
    return C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_writers)
 def create_distributed_learner(self, mode, config):
     local_learner = C.sgd(self.z.parameters,
                           C.learning_parameter_schedule_per_sample(0.01))
     try:
         if mode == 'data_parallel':
             if config is None:
                 config = DataParallelConfig(num_quantization_bits=32,
                                             distributed_after=0)
             learner = C.data_parallel_distributed_learner(
                 local_learner,
                 num_quantization_bits=config.num_quantization_bits,
                 distributed_after=config.distributed_after)
         elif mode == 'block_momentum':
             if config is None:
                 # the default config to match data parallel SGD
                 config = BlockMomentumConfig(
                     block_momentum_as_time_constant=0,
                     block_learning_rate=1,
                     block_size=NUM_WORKERS,
                     distributed_after=0)
             learner = C.block_momentum_distributed_learner(
                 local_learner,
                 block_momentum_as_time_constant=config.
                 block_momentum_as_time_constant,
                 block_learning_rate=config.block_learning_rate,
                 block_size=config.block_size,
                 distributed_after=config.distributed_after)
         else:
             learner = local_learner
     except RuntimeError:
         learner = None
     return learner
Exemple #10
0
    def create_trainer(self):
        try:
            p = self.output.parameters
            # Three of four parameters are learned by block_momentum_distributed_learner.
            bmd_learner = cntk.block_momentum_distributed_learner(
                cntk.momentum_sgd(
                    [p[0], p[1], p[2]],
                    cntk.learning_parameter_schedule(0.0001),
                    cntk.momentum_as_time_constant_schedule(1000)),
                block_size=1000,
                block_learning_rate=0.01,
                block_momentum_as_time_constant=1000)

            # New API to mark which learner is to use for metric aggregaion.
            bmd_learner.set_as_metric_aggregator()

            # The last parameter is learned by the data_parallel_distributed_learner.
            momentum_schedule = cntk.momentum_schedule_per_sample(
                0.9990913221888589)
            lr_per_sample = cntk.learning_parameter_schedule_per_sample(0.007)
            dpd_learner = cntk.data_parallel_distributed_learner(
                cntk.momentum_sgd([p[3]], lr_per_sample, momentum_schedule,
                                  True))

            comm_rank = cntk.distributed.Communicator.rank()
            self.trainer = cntk.Trainer(
                self.output, (self.ce, self.err), [bmd_learner, dpd_learner], [
                    cntk.logging.ProgressPrinter(
                        freq=progress_freq, tag="Training", rank=comm_rank)
                ])
        except RuntimeError:
            self.trainer = None
        return
def test_session_progress_print_on_sweep_unit(tmpdir, device_id):
    device = cntk_device(device_id)
    writer = MockProgressWriter()
    #set to a higher learning rate as we don't need to have converge but just to go through all the samples
    t, feature, label = create_sample_model(device, writer, lr_per_sample=C.learning_parameter_schedule_per_sample(0.3))
    mbs = mb_source(tmpdir, "training",
                    #max_samples=INFINITELY_REPEAT,
                    max_sweeps = 4)

    input_map = {
        feature: mbs.streams.features,
        label: mbs.streams.labels
    }

    test_dir = str(tmpdir)

    C.training_session(
        trainer=t, mb_source=mbs,
        mb_size=C.minibatch_size_schedule(5),
        model_inputs_to_streams=input_map, max_samples=FULL_DATA_SWEEP,
        progress_frequency=(2, C.train.DataUnit.sweep)
    ).train(device)
    #4 sweeps of 25 samples = 100 samples
    assert(t.total_number_of_samples_seen == 100)
    #output every 2 epoch sweeps; 4 sweeps in total, at the end 2 outputs are written:
    assert(writer.training_summary_counter == 2)
Exemple #12
0
def test_session_progress_print_on_sweep_unit(tmpdir, device_id):
    device = cntk_device(device_id)
    writer = MockProgressWriter()
    #set to a higher learning rate as we don't need to have converge but just to go through all the samples
    t, feature, label = create_sample_model(
        device,
        writer,
        lr_per_sample=C.learning_parameter_schedule_per_sample(0.3))
    mbs = mb_source(
        tmpdir,
        "training",
        #max_samples=INFINITELY_REPEAT,
        max_sweeps=4)

    input_map = {feature: mbs.streams.features, label: mbs.streams.labels}

    test_dir = str(tmpdir)

    C.training_session(
        trainer=t,
        mb_source=mbs,
        mb_size=C.minibatch_size_schedule(5),
        model_inputs_to_streams=input_map,
        max_samples=FULL_DATA_SWEEP,
        progress_frequency=(2, C.train.DataUnit.sweep)).train(device)
    #4 sweeps of 25 samples = 100 samples
    assert (t.total_number_of_samples_seen == 100)
    #output every 2 epoch sweeps; 4 sweeps in total, at the end 2 outputs are written:
    assert (writer.training_summary_counter == 2)
Exemple #13
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(
        StreamDefs(amazing_features=StreamDef(
            shape=feature_dim, context=(context, context), scp=features_file)))

    ld = HTKMLFDeserializer(
        label_mapping_file,
        StreamDefs(
            awesome_labels=StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd, ld])

    features = C.sequence.input_variable(((2 * context + 1) * feature_dim))
    labels = C.sequence.input_variable((num_classes))

    model = Sequential(
        [For(range(3), lambda: Recurrence(LSTM(256))),
         Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error(z, labels)

    learner = C.fsadagrad(
        z.parameters,
        lr=C.learning_parameter_schedule_per_sample(lr, epoch_size=epoch_size),
        momentum=C.momentum_schedule_per_sample(0.9990913221888589),
        gradient_clipping_threshold_per_sample=15,
        gradient_clipping_with_truncation=True)
    progress_printer = C.logging.ProgressPrinter(freq=0)
    trainer = C.Trainer(z, (ce, errs), learner, progress_printer)

    input_map = {
        features: reader.streams.amazing_features,
        labels: reader.streams.awesome_labels
    }

    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
    assert True
    os.chdir(abs_path)
def adjust_lr_callback(index, average_error, cv_num_samples, cv_num_minibatches):
    global prev_metric
    if (prev_metric - average_error) / prev_metric < 0.05: # relative gain must reduce metric by at least 5% rel
        learner.reset_learning_rate(C.learning_parameter_schedule_per_sample(learner.learning_rate() / 2))
        if learner.learning_rate() < lr_per_sample / (2**7-0.1): # we are done after the 6-th LR cut
            print("Learning rate {} too small. Training complete.".format(learner.learning_rate()))
            return False # means we are done
        print("Improvement of metric from {:.3f} to {:.3f} insufficient. Halving learning rate to {}.".format(prev_metric, average_error, learner.learning_rate()))
    prev_metric = average_error
    return True # means continue
def create_sample_model(device, writer=None,
                        lr_per_sample=C.learning_parameter_schedule_per_sample([0.3, 0.2, 0.1, 0.0])):
    in1 = sequence.input_variable(shape=(input_dim,))
    labels = sequence.input_variable(shape=(input_dim,))
    p = parameter(shape=(input_dim,), init=10, device=device)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    learner = C.sgd(z.parameters, lr_per_sample)
    trainer = C.Trainer(z, (ce, errs), [learner], writer)
    return (trainer, in1, labels)
Exemple #16
0
def create_learner(model):
    '''Create the optimized method'''
    optim = "momentum_sgd"
    lr = 0.001
    lr_per_sample = C.learning_parameter_schedule_per_sample(lr)
    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    if optim == 'momentum_sgd':
        clipping_threshold_per_sample = 5.0
        gradient_clipping_with_truncation = True
        return C.momentum_sgd(model.parameters, lr_per_sample, momentum_schedule,
                              gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
                              gradient_clipping_with_truncation=gradient_clipping_with_truncation)
Exemple #17
0
def train_mse_cntk(x, y, model, train_gen, val_gen, epochs, val_steps):
    loss_function = cntk.squared_error(model, y)
    accuracy_function = loss_function
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.001),
                        cntk.learning_parameter_schedule_per_sample(0.9))
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner])
    evaluator = cntk.Evaluator(accuracy_function)

    history = fit_generator(x,
                            y,
                            model=model,
                            trainer=trainer,
                            evaluator=evaluator,
                            train_gen=train_gen,
                            steps_per_epoch=500,
                            epochs=epochs,
                            val_gen=val_gen,
                            validation_steps=val_steps)

    plot_results(history)
Exemple #18
0
def create_sample_model(device,
                        writer=None,
                        lr_per_sample=C.learning_parameter_schedule_per_sample(
                            [0.3, 0.2, 0.1, 0.0])):
    in1 = sequence.input_variable(shape=(input_dim, ))
    labels = sequence.input_variable(shape=(input_dim, ))
    p = parameter(shape=(input_dim, ), init=10, device=device)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    learner = C.sgd(z.parameters, lr_per_sample)
    trainer = C.Trainer(z, (ce, errs), [learner], writer)
    return (trainer, in1, labels)
def use_glove_word_embeddings_cntk(preload_weights=False):
    tokenizer, x_train, y_train, x_val, y_val = from_raw_text_to_word_embeddings(
    )

    x = cntk.input_variable(shape=(Constants.maxlen, ), dtype=np.float32)
    y = cntk.input_variable(shape=(1, ), dtype=np.float32)
    model = cntk.one_hot(x,
                         num_classes=Constants.max_words,
                         sparse_output=True)
    if preload_weights is True:
        embedding_matrix = compute_embedding_matrix(tokenizer)
        assert (Constants.embedding_dim
                == embedding_matrix.shape[0]) or (Constants.embedding_dim
                                                  == embedding_matrix.shape[1])
        model = cntk.layers.Embedding(weights=embedding_matrix)(model)
    else:
        model = cntk.layers.Embedding(Constants.embedding_dim)(model)
    model = cntk.layers.Dense(32, activation=cntk.relu)(model)
    model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model)
    loss_function = cntk.binary_cross_entropy(model.output, y)
    round_predictions = cntk.round(model.output)
    equal_elements = cntk.equal(round_predictions, y)
    accuracy_function = cntk.reduce_mean(equal_elements, axis=0)

    max_epochs = 10
    batch_size = 32
    learner = cntk.adam(model.parameters,
                        cntk.learning_parameter_schedule_per_sample(0.0001),
                        cntk.learning_parameter_schedule_per_sample(0.99))
    progress_printer = cntk.logging.ProgressPrinter(tag='Training',
                                                    num_epochs=max_epochs)
    trainer = cntk.Trainer(model, (loss_function, accuracy_function),
                           [learner], progress_printer)
    evaluator = cntk.Evaluator(accuracy_function)

    cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer,
               evaluator)
    def create_trainer(self):
        try:
            lr_per_sample = cntk.learning_parameter_schedule_per_sample(0.007)
            learner = cntk.data_parallel_distributed_learner(
                cntk.sgd(self.output.parameters, lr_per_sample))

            comm_rank = cntk.distributed.Communicator.rank()
            self.trainer = cntk.Trainer(
                self.output, (self.ce, self.err), [learner], [
                    cntk.logging.ProgressPrinter(
                        freq=progress_freq, tag="Training", rank=comm_rank)
                ])
        except RuntimeError:
            self.trainer = None
        return
    def create_trainer(use_sparse, device):
        a = C.sequence.input_variable(shape=input_shape,
                                      is_sparse=use_sparse,
                                      name='input')
        w = C.parameter(init=w_init, device=dev)
        z = times(a, w)

        l = C.sequence.input_variable(shape=label_shape,
                                      is_sparse=use_sparse,
                                      name='label')
        loss = cross_entropy_with_softmax(z, l, axis=-1)
        trainer = C.Trainer(
            z, (loss, None),
            C.sgd(z.parameters,
                  lr=C.learning_parameter_schedule_per_sample(0.7)))
        return (a, l, w, trainer)
Exemple #22
0
def train_sequence_classifier():
    input_dim = 2000
    hidden_dim = 25
    embedding_dim = 50
    num_classes = 5

    # Input variables denoting the features and label data
    features = C.sequence.input_variable(shape=input_dim, is_sparse=True)
    label = C.input_variable(num_classes)

    # Instantiate the sequence classification model
    classifier_output = lstm_sequence_classifier(features, num_classes,
                                                 embedding_dim, hidden_dim)

    ce = C.cross_entropy_with_softmax(classifier_output, label)
    pe = C.classification_error(classifier_output, label)

    rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_classes)

    input_map = {
        features: reader.streams.features,
        label: reader.streams.labels
    }

    lr_per_sample = C.learning_parameter_schedule_per_sample(0.1)

    # Instantiate the trainer object to drive the model training
    progress_printer = C.logging.ProgressPrinter(0)
    trainer = C.Trainer(classifier_output, (ce, pe),
                        C.sgd(classifier_output.parameters, lr=lr_per_sample),
                        progress_printer)

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200

    for i in range(251):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)

    evaluation_average = copy.copy(
        trainer.previous_minibatch_evaluation_average)
    loss_average = copy.copy(trainer.previous_minibatch_loss_average)

    return evaluation_average, loss_average
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(StreamDefs(
        amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))

    ld = HTKMLFDeserializer(label_mapping_file, StreamDefs(
        awesome_labels = StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd,ld])

    features = C.sequence.input_variable(((2*context+1)*feature_dim))
    labels = C.sequence.input_variable((num_classes))

    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error    (z, labels)

    learner = C.fsadagrad(z.parameters,
                          lr=C.learning_parameter_schedule_per_sample(lr, epoch_size=epoch_size),
                          momentum=C.momentum_schedule_per_sample(0.9990913221888589),
                          gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
    progress_printer = C.logging.ProgressPrinter(freq=0)
    trainer = C.Trainer(z, (ce, errs), learner, progress_printer)

    input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels }

    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
    assert True
    os.chdir(abs_path)
 def create_distributed_learner(self, mode, config):
     local_learner = C.sgd(self.z.parameters, C.learning_parameter_schedule_per_sample(0.01))
     try:
         if mode == 'data_parallel':
             if config is None:
                 config = DataParallelConfig(num_quantization_bits=32, distributed_after=0)
             learner = C.data_parallel_distributed_learner(local_learner, num_quantization_bits=config.num_quantization_bits, distributed_after=config.distributed_after)
         elif mode == 'block_momentum':
             if config is None:
                 # the default config to match data parallel SGD
                 config = BlockMomentumConfig(block_momentum_as_time_constant=0, block_learning_rate=1, block_size=NUM_WORKERS, distributed_after=0)
             learner = C.block_momentum_distributed_learner(local_learner, block_momentum_as_time_constant=config.block_momentum_as_time_constant, block_learning_rate=config.block_learning_rate, block_size=config.block_size, distributed_after=config.distributed_after)
         else:
             learner = local_learner
     except RuntimeError:
         learner = None
     return learner
Exemple #25
0
def test_usermbsource_training(tmpdir, with_checkpoint_impl):
    input_dim = 1000
    num_output_classes = 5

    mbs = MyDataSource(input_dim, num_output_classes)
    # Using this for testing the UserMinibatchSource checkpointing
    if with_checkpoint_impl:
        MBS_CV_CLASS = MyDataSourceWithCheckpoint
    else:
        MBS_CV_CLASS = MyDataSource

    mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes)

    from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
            classification_error, learning_parameter_schedule_per_sample, sgd, Trainer, \
            training_session, times

    feature = sequence.input_variable(shape=(input_dim,))
    label = C.input_variable(shape=(num_output_classes,))
    p = parameter(shape=(input_dim, num_output_classes), init=10)
    z = times(sequence.reduce_sum(feature), p, name='z')
    ce = cross_entropy_with_softmax(z, label)
    errs = classification_error(z, label)

    #having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed
    #note that training session can end earlier if there is no updates
    lr_per_sample = learning_parameter_schedule_per_sample(0.3)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])
    input_map = {
        feature: mbs.fsi,
        label: mbs.lsi
    }

    session = training_session(
        trainer=trainer, mb_source=mbs,
        model_inputs_to_streams=input_map,
        mb_size=4, max_samples=20,
        cv_config = C.CrossValidationConfig(minibatch_source=mbs_cv, max_samples=10,
            minibatch_size=2)
    )
    session.train()

    assert trainer.total_number_of_samples_seen == 20
    if with_checkpoint_impl:
        assert mbs_cv._restore_from_checkpoint_calls == 1
Exemple #26
0
def test_restore_constants(tmpdir):
    C.device.try_set_default_device(C.device.cpu())

    def _setvalue(x, v):
        x.value = 0 * x.value + v if len(x.shape) > 0 else np.array(
            v, dtype=np.float32)

    def _setall(f, v):
        for x in f.constants + f.parameters:
            _setvalue(x, v)

    def _checkall(f, v):
        for x in f.constants + f.parameters:
            assert (x.value == v).all()

    x = C.input_variable(10)
    f = C.layers.BatchNormalization()(x)
    trainer = C.Trainer(
        f, C.reduce_sum(f),
        C.sgd(f.parameters, C.learning_parameter_schedule_per_sample(0.1)))

    model_filename = str(tmpdir / 'function.out')
    checkpoint_filename = str(tmpdir / 'checkpoint.out')
    _setall(f, 1)
    f.save(model_filename)
    _checkall(f, 1)

    _setall(f, 2)
    trainer.save_checkpoint(checkpoint_filename)
    _checkall(f, 2)

    _setall(f, 3)
    _checkall(f, 3)
    trainer.restore_from_checkpoint(checkpoint_filename)
    _checkall(f, 2)

    f2 = C.Function.load(model_filename)
    _checkall(f2, 1)

    _setall(f, 4)
    _checkall(f, 4)
    f.restore(model_filename)
    _checkall(f, 1)

    _setall(f2, 5)
    _checkall(f2, 5)
Exemple #27
0
def create_trainer():
    masked_dec = dec * C.ops.clip(C.ops.argmax(y), 0, 1)
    loss, label_error = criterion(masked_dec, y)
    loss *= C.ops.clip(C.ops.argmax(y), 0, 1)

    lr_schedule = C.learning_parameter_schedule_per_sample([1e-3] * 2 + [5e-4] * 2 + [1e-4], epoch_size=int(epoch_size))
    momentum_as_time_constant = C.momentum_as_time_constant_schedule(1000)
    learner = C.adam(parameters=dec.parameters,
                     lr=lr_schedule,
                     momentum=momentum_as_time_constant,
                     gradient_clipping_threshold_per_sample=15,
                     gradient_clipping_with_truncation=True)

    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=num_epochs)
    trainer = C.Trainer(dec, (loss, label_error), learner, progress_printer)
    C.logging.log_number_of_parameters(dec)
    return trainer
def train_sequence_classifier():
    input_dim = 2000
    hidden_dim = 25
    embedding_dim = 50
    num_classes = 5

    # Input variables denoting the features and label data
    features = C.sequence.input_variable(shape=input_dim, is_sparse=True)
    label = C.input_variable(num_classes)

    # Instantiate the sequence classification model
    classifier_output = lstm_sequence_classifier(features, num_classes, embedding_dim, hidden_dim)

    ce = C.cross_entropy_with_softmax(classifier_output, label)
    pe = C.classification_error(classifier_output, label)

    rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_classes)

    input_map = {
        features : reader.streams.features,
        label    : reader.streams.labels
    }

    lr_per_sample = C.learning_parameter_schedule_per_sample(0.1)

    # Instantiate the trainer object to drive the model training
    progress_printer = C.logging.ProgressPrinter(0)
    trainer = C.Trainer(classifier_output, (ce, pe),
                        C.sgd(classifier_output.parameters, lr=lr_per_sample),
                        progress_printer)

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200

    for i in range(251):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)

    evaluation_average = copy.copy(trainer.previous_minibatch_evaluation_average)
    loss_average = copy.copy(trainer.previous_minibatch_loss_average)

    return evaluation_average, loss_average
def adjust_lr_callback(index, average_error, cv_num_samples,
                       cv_num_minibatches):
    global prev_metric
    if (
            prev_metric - average_error
    ) / prev_metric < 0.05:  # relative gain must reduce metric by at least 5% rel
        learner.reset_learning_rate(
            C.learning_parameter_schedule_per_sample(learner.learning_rate() /
                                                     2))
        if learner.learning_rate() < lr_per_sample / (
                2**7 - 0.1):  # we are done after the 6-th LR cut
            print("Learning rate {} too small. Training complete.".format(
                learner.learning_rate()))
            return False  # means we are done
        print(
            "Improvement of metric from {:.3f} to {:.3f} insufficient. Halving learning rate to {}."
            .format(prev_metric, average_error, learner.learning_rate()))
    prev_metric = average_error
    return True  # means continue
Exemple #30
0
def test_restore_constants(tmpdir):
    C.device.try_set_default_device(C.device.cpu())
    def _setvalue(x, v):
        x.value = 0 * x.value + v if len(x.shape)> 0 else np.array(v, dtype=np.float32)

    def _setall(f, v):
        for x in f.constants + f.parameters:
            _setvalue(x, v)

    def _checkall(f, v):
        for x in f.constants + f.parameters:
            assert (x.value == v).all()

    x = C.input_variable(10)
    f = C.layers.BatchNormalization()(x)
    trainer = C.Trainer(f, C.reduce_sum(f), C.sgd(f.parameters, C.learning_parameter_schedule_per_sample(0.1)))

    model_filename = str(tmpdir / 'function.out')
    checkpoint_filename = str(tmpdir / 'checkpoint.out')
    _setall(f, 1)
    f.save(model_filename)
    _checkall(f, 1)

    _setall(f, 2)
    trainer.save_checkpoint(checkpoint_filename)
    _checkall(f, 2)

    _setall(f, 3)
    _checkall(f, 3)
    trainer.restore_from_checkpoint(checkpoint_filename)
    _checkall(f, 2)

    f2 = C.Function.load(model_filename)
    _checkall(f2, 1)

    _setall(f, 4)
    _checkall(f, 4)
    f.restore(model_filename)
    _checkall(f, 1)

    _setall(f2, 5)
    _checkall(f2, 5)
Exemple #31
0
    def Loss(self):
        # Evaluating old actions and values :
        logprobs, state_value, dist_entropy = self.policy.evaluate()

        # Finding the ratio (pi_theta / pi_theta__old): # (importance sampling)
        c_old_logprobs = C.input_variable(logprobs.shape, name='old_log_probs')
        ratios = C.exp(logprobs - C.stop_gradient(c_old_logprobs))

        c_rewards = C.input_variable(1, name='rewards')
        advantages = c_rewards - C.stop_gradient(state_value)

        # Finding Surrogate Loss:
        surr1 = ratios * advantages
        surr2 = C.clip(ratios, 1 - self.eps_clip,
                       1 + self.eps_clip) * advantages
        neglog_loss = -C.element_min(surr1, surr2)
        entropy_loss = -0.01 * dist_entropy
        actor_loss = C.reduce_mean(neglog_loss + entropy_loss)
        critic_loss = 0.5 * C.reduce_mean(C.square(state_value - c_rewards))
        loss = actor_loss + critic_loss

        chunk = {
            'neglog_loss': neglog_loss,
            'entropy_loss': entropy_loss,
            'actor_loss': actor_loss,
            'critic_loss': critic_loss
        }

        trainer = C.Trainer(
            loss, (loss, None),
            C.adam(loss.parameters,
                   C.learning_parameter_schedule_per_sample(self.lr),
                   C.momentum_schedule_per_sample(self.betas[0]),
                   variance_momentum=C.momentum_schedule_per_sample(
                       self.betas[1])))
        # trainer = C.Trainer(loss, (loss, None), C.adam(loss.parameters, C.learning_parameter_schedule(10), C.momentum_schedule(0.9), variance_momentum=C.momentum_schedule(0.999))) # higher learning rate

        return loss, chunk, trainer
Exemple #32
0
def create_trainer(network, epoch_size, num_quantization_bits, block_size,
                   warm_up, progress_writers):
    # Set learning parameters
    lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [
        0.00015625
    ] * 20 + [0.000046875] * 10 + [0.000015625]
    lr_schedule = C.learning_parameter_schedule_per_sample(
        lr_per_sample, epoch_size=epoch_size)
    mms = [0] * 20 + [0.9983347214509387] * 20 + [0.9991670137924583]
    mm_schedule = C.learners.momentum_schedule_per_sample(
        mms, epoch_size=epoch_size)
    l2_reg_weight = 0.002

    # Create learner
    if block_size != None and num_quantization_bits != 32:
        raise RuntimeError(
            "Block momentum cannot be used with quantization, please remove quantized_bits option."
        )

    local_learner = C.learners.momentum_sgd(
        network['output'].parameters,
        lr_schedule,
        mm_schedule,
        l2_regularization_weight=l2_reg_weight)

    if block_size != None:
        parameter_learner = C.train.distributed.block_momentum_distributed_learner(
            local_learner, block_size=block_size)
    else:
        parameter_learner = C.train.distributed.data_parallel_distributed_learner(
            local_learner,
            num_quantization_bits=num_quantization_bits,
            distributed_after=warm_up)

    # Create trainer
    return C.Trainer(network['output'], (network['ce'], network['pe']),
                     parameter_learner, progress_writers)
    def create_trainer(use_sparse, device):
        a = C.sequence.input_variable(shape=input_shape,
                                      is_sparse=use_sparse,
                                      name='input')
        w_i = C.parameter(init=w_init_i, device=dev)
        a_projection = times(a, w_i)

        p_o = C.placeholder()
        h = C.sequence.past_value(p_o)
        w_h = C.parameter(init=w_init_h, device=dev)
        h_projection = times(h, w_h)
        z = a_projection + h_projection
        z = z.replace_placeholder(z)
        z = reshape(z, label_shape)

        l = C.sequence.input_variable(shape=label_shape,
                                      is_sparse=use_sparse,
                                      name='label')
        loss = cross_entropy_with_softmax(z, l, axis=-1)
        trainer = C.Trainer(
            z, (loss, None),
            C.sgd(z.parameters,
                  lr=C.learning_parameter_schedule_per_sample(0.7)))
        return (a, l, w_i, w_h, trainer)
    def create_trainer(self):
        try:
            lr_per_sample = cntk.learning_parameter_schedule_per_sample(0.007)
            p = self.output.parameters
            # Three of four parameters are learned by first data_parallel_distributed_learner.
            learner1 = cntk.data_parallel_distributed_learner(
                cntk.sgd([p[0], p[1], p[2]], lr_per_sample))

            # New API to mark which learner is to use for metric aggregaion.
            learner1.set_as_metric_aggregator()

            # The last parameter is learned by another data_parallel_distributed_learner.
            learner2 = cntk.data_parallel_distributed_learner(
                cntk.sgd([p[3]], lr_per_sample))

            comm_rank = cntk.distributed.Communicator.rank()
            self.trainer = cntk.Trainer(
                self.output, (self.ce, self.err), [learner1, learner2], [
                    cntk.logging.ProgressPrinter(
                        freq=progress_freq, tag="Training", rank=comm_rank)
                ])
        except RuntimeError:
            self.trainer = None
        return
Exemple #35
0
def convnetlrn_cifar10_dataaug(reader_train,
                               reader_test,
                               epoch_size=50000,
                               max_epochs=80):
    _cntk_py.set_computation_network_trace_level(0)

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    # input normalization 1/256 = 0.00396025
    scaled_input = C.element_times(C.constant(0.00390625), input_var)
    f = GlobalAveragePooling()
    f.update_signature((1, 8, 8))

    with C.layers.default_options():
        z = C.layers.Sequential([
            C.layers.For(
                range(1), lambda: [
                    C.layers.Convolution2D(
                        (3, 3), 32, strides=(1, 1), pad=True),
                    C.layers.Activation(activation=C.relu),
                    C.layers.Convolution2D(
                        (1, 1), 64, strides=(1, 1), pad=False),
                    C.layers.MaxPooling((3, 3), strides=(2, 2), pad=True),
                    C.layers.Dropout(0.5)
                ]),
            C.layers.For(
                range(1), lambda: [
                    C.layers.Convolution2D(
                        (3, 3), 128, strides=(1, 1), pad=True),
                    C.layers.Activation(activation=C.relu),
                    C.layers.Convolution2D(
                        (1, 1), 160, strides=(1, 1), pad=False),
                    C.layers.Activation(activation=C.relu),
                    C.layers.MaxPooling((3, 3), strides=(2, 2), pad=True),
                    C.layers.Dropout(0.5)
                ]),
            C.layers.For(
                range(1), lambda: [
                    C.layers.Convolution2D(
                        (3, 3), 192, strides=(1, 1), pad=True),
                    C.layers.Activation(activation=C.relu),
                    C.layers.Convolution2D(
                        (1, 1), 256, strides=(1, 1), pad=False),
                    C.layers.Activation(activation=C.relu),
                    C.layers.Convolution2D(
                        (1, 1), 10, strides=(1, 1), pad=False),
                    C.layers.Activation(activation=C.relu),
                    C.layers.AveragePooling((8, 8), strides=(1, 1), pad=False)
                ])
        ])(scaled_input)

    print('z.shape', z.shape)
    z = C.flatten(z)

    print('z.shape now', z.shape)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    # training config
    minibatch_size = 64
    # Set learning parameters
    # learning rate
    lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [
        0.00015625
    ] * 20 + [0.000046875] * 10 + [0.000015625]
    lr_schedule = C.learning_parameter_schedule_per_sample(
        lr_per_sample, epoch_size=epoch_size)
    # momentum
    mms = [0] * 20 + [0.9983347214509387] * 20 + [0.9991670137924583]
    mm_schedule = C.learners.momentum_schedule_per_sample(
        mms, epoch_size=epoch_size)
    l2_reg_weight = 0.002

    # trainer object
    learner = C.learners.momentum_sgd(z.parameters,
                                      lr_schedule,
                                      mm_schedule,
                                      unit_gain=True,
                                      l2_regularization_weight=l2_reg_weight)
    progress_printer = C.logging.ProgressPrinter(tag='Training',
                                                 num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    C.logging.log_number_of_parameters(z)
    print()
    # perform model training
    for epoch in range(max_epochs):  # loop over epochs
        sample_count = 0

        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(
                min(minibatch_size, epoch_size - sample_count),
                input_map=input_map)  # fetch minibatch.
            trainer.train_minibatch(data)  # update model with it
            sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far

        trainer.summarize_training_progress()

    # save model
    modelname = "NIN_test1.dnn"
    z.save(os.path.join(model_path, modelname))

    ### Evaluation action
    epoch_size = 10000
    minibatch_size = 16
    # process minibatches and evaluate the model
    metric_numer = 0
    metric_denom = 0
    sample_count = 0
    minibatch_index = 0

    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)
        data = reader_test.next_minibatch(current_minibatch,
                                          input_map=input_map)
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        sample_count += current_minibatch
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(
        minibatch_index + 1, (metric_numer * 100.0) / metric_denom,
        metric_denom))
    print("")

    return metric_numer / metric_denom
Exemple #36
0
def train_and_test(reader_train, reader_test, model_func):

    ###############################################
    # Training the model
    ###############################################

    # Instantiate the input and the label variables
    input = C.input_variable(input_dim)
    label = C.input_variable(input_dim)

    # Create the model function
    model = model_func(input)

    # The labels for this network is same as the input MNIST image.
    # Note: Inside the model we are scaling the input to 0-1 range
    # Hence we rescale the label to the same range
    # We show how one can use their custom loss function
    # loss = -(y* log(p)+ (1-y) * log(1-p)) where p = model output and y = target
    # We have normalized the input between 0-1. Hence we scale the target to same range

    target = label / 255.0
    loss = -(target * C.log(model) + (1 - target) * C.log(1 - model))
    label_error = C.classification_error(model, target)

    # training config
    epoch_size = 30000  # 30000 samples is half the dataset size
    minibatch_size = 64
    num_sweeps_to_train_with = 5 if isFast else 100
    num_samples_per_sweep = 60000
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) // minibatch_size

    # Instantiate the trainer object to drive the model training
    lr_per_sample = [0.00003]
    lr_schedule = C.learning_parameter_schedule_per_sample(
        lr_per_sample, epoch_size)

    # Momentum which is applied on every minibatch_size = 64 samples
    momentum_schedule = C.momentum_schedule(0.9126265014311797, minibatch_size)

    # We use a variant of the Adam optimizer which is known to work well on this dataset
    # Feel free to try other optimizers from
    # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner
    learner = C.fsadagrad(model.parameters,
                          lr=lr_schedule,
                          momentum=momentum_schedule)

    # Instantiate the trainer
    progress_printer = C.logging.ProgressPrinter(0)
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    # Map the data streams to the input and labels.
    # Note: for autoencoders input == label
    input_map = {
        input: reader_train.streams.features,
        label: reader_train.streams.features
    }

    aggregate_metric = 0
    for i in range(num_minibatches_to_train):
        # Read a mini batch from the training data file
        data = reader_train.next_minibatch(minibatch_size, input_map=input_map)

        # Run the trainer on and perform model training
        trainer.train_minibatch(data)
        samples = trainer.previous_minibatch_sample_count
        aggregate_metric += trainer.previous_minibatch_evaluation_average * samples

    train_error = (aggregate_metric *
                   100.0) / (trainer.total_number_of_samples_seen)
    print("Average training error: {0:0.2f}%".format(train_error))

    #############################################################################
    # Testing the model
    # Note: we use a test file reader to read data different from a training data
    #############################################################################

    # Test data for trained model
    test_minibatch_size = 32
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size

    # Test error metric calculation
    metric_numer = 0
    metric_denom = 0

    test_input_map = {
        input: reader_test.streams.features,
        label: reader_test.streams.features
    }

    for i in range(0, int(num_minibatches_to_test)):

        # We are loading test data in batches specified by test_minibatch_size
        # Each data point in the minibatch is a MNIST digit image of 784 dimensions
        # with one pixel per dimension that we will encode / decode with the
        # trained model.
        data = reader_test.next_minibatch(test_minibatch_size,
                                          input_map=test_input_map)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be tested with
        eval_error = trainer.test_minibatch(data)

        # minibatch data to be trained with
        metric_numer += np.abs(eval_error * test_minibatch_size)
        metric_denom += test_minibatch_size

    # Average of evaluation errors of all test minibatches
    test_error = (metric_numer * 100.0) / (metric_denom)
    print("Average test error: {0:0.2f}%".format(test_error))

    return model, train_error, test_error
Exemple #37
0
    G_fake = dcgan_generator(z)
    D_real = dcgan_discriminator(x_real)
    D_fake = D_real.clone(method="share",
                          substitutions={x_real.output: G_fake.output})

    #
    # loss function
    #
    G_loss = -C.log(D_fake)
    D_loss = -(C.log(D_real) + C.log(1.0 - D_fake))

    #
    # optimizer
    #
    G_learner = C.adam(G_fake.parameters,
                       lr=C.learning_parameter_schedule_per_sample(1e-4),
                       momentum=0.5,
                       gradient_clipping_threshold_per_sample=minibatch_size,
                       gradient_clipping_with_truncation=True)
    D_learner = C.adam(D_real.parameters,
                       lr=C.learning_parameter_schedule_per_sample(1e-4),
                       momentum=0.5,
                       gradient_clipping_threshold_per_sample=minibatch_size,
                       gradient_clipping_with_truncation=True)
    G_progress_printer = C.logging.ProgressPrinter(tag="Generator")
    D_progress_printer = C.logging.ProgressPrinter(tag="Discriminator")

    if not os.path.exists("./dcgan_image"):
        os.mkdir("./dcgan_image")

    G_trainer = C.Trainer(G_fake, (G_loss, None), [G_learner],
Exemple #38
0
def convnet_mnist(debug_output=False, epoch_size=60000, minibatch_size=64, max_epochs=40):
    image_height = 64
    image_width  = 64
    num_channels = 1
    input_dim = image_height * image_width * num_channels
    num_output_classes = 4

    # Input variables denoting the features and label data
    input_var = C.ops.input_variable((num_channels, image_height, image_width), np.float32)
    label_var = C.ops.input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = C.ops.element_times(C.ops.constant(0.00390625), input_var)

    with C.layers.default_options(activation=C.ops.relu, pad=False):
        conv1 = C.layers.Convolution2D((5,5), 32, pad=True)(scaled_input)
        pool1 = C.layers.MaxPooling((3,3), (2,2))(conv1)
        conv2 = C.layers.Convolution2D((3,3), 48)(pool1)
        pool2 = C.layers.MaxPooling((3,3), (2,2))(conv2)
        conv3 = C.layers.Convolution2D((3,3), 64)(pool2)
        f4    = C.layers.Dense(96)(conv3)
        drop4 = C.layers.Dropout(0.5)(f4)
        z     = C.layers.Dense(num_output_classes, activation=None)(drop4)

    ce = C.losses.cross_entropy_with_softmax(z, label_var)
    pe = C.metrics.classification_error(z, label_var)

    reader_train = create_reader(os.path.join(data_path, 'Data-train-15000_20180720_070615.txt'), True, input_dim, num_output_classes)

    # Set learning parameters
    lr_per_sample    = [0.001]*10 + [0.0005]*10 + [0.0001]
    lr_schedule      = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
    mms = [0]*5 + [0.9990239141819757]
    mm_schedule      = C.learners.momentum_schedule_per_sample(mms, epoch_size=epoch_size)

    # Instantiate the trainer object to drive the model training
    learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule)
    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var : reader_train.streams.features,
        label_var : reader_train.streams.labels
    }

    C.logging.log_number_of_parameters(z) ; print()

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += data[label_var].num_samples                     # count samples processed so far

        trainer.summarize_training_progress()
        z.save(os.path.join(model_path, "ConvNet_MNIST_{}.dnn".format(epoch)))
    
    # Load test data
    reader_test = create_reader(os.path.join(data_path, 'Data-test-5000_20180720_070615.txt'), False, input_dim, num_output_classes)

    input_map = {
        input_var : reader_test.streams.features,
        label_var : reader_test.streams.labels
    }

    # Test data for trained model
    epoch_size = 5000
    minibatch_size = 250

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0
    minibatch_index = 0

    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)

        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)

        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch

        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")

    return metric_numer/metric_denom
Exemple #39
0
def convnet_mnist(debug_output=False, epoch_size=60000, minibatch_size=64, max_epochs=40):
    image_height = 28
    image_width  = 28
    num_channels = 1
    input_dim = image_height * image_width * num_channels
    num_output_classes = 10

    # Input variables denoting the features and label data
    input_var = C.ops.input_variable((num_channels, image_height, image_width), np.float32)
    label_var = C.ops.input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = C.ops.element_times(C.ops.constant(0.00390625), input_var)

    with C.layers.default_options(activation=C.ops.relu, pad=False):
        conv1 = C.layers.Convolution2D((5,5), 32, pad=True)(scaled_input)
        pool1 = C.layers.MaxPooling((3,3), (2,2))(conv1)
        conv2 = C.layers.Convolution2D((3,3), 48)(pool1)
        pool2 = C.layers.MaxPooling((3,3), (2,2))(conv2)
        conv3 = C.layers.Convolution2D((3,3), 64)(pool2)
        f4    = C.layers.Dense(96)(conv3)
        drop4 = C.layers.Dropout(0.5)(f4)
        z     = C.layers.Dense(num_output_classes, activation=None)(drop4)

    ce = C.losses.cross_entropy_with_softmax(z, label_var)
    pe = C.metrics.classification_error(z, label_var)

    reader_train = create_reader(os.path.join(data_path, 'Train-28x28_cntk_text.txt'), True, input_dim, num_output_classes)

    # Set learning parameters
    lr_per_sample    = [0.001]*10 + [0.0005]*10 + [0.0001]
    lr_schedule      = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
    mms = [0]*5 + [0.9990239141819757]
    mm_schedule      = C.learners.momentum_schedule_per_sample(mms, epoch_size=epoch_size)

    # Instantiate the trainer object to drive the model training
    learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule)
    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var : reader_train.streams.features,
        label_var : reader_train.streams.labels
    }

    C.logging.log_number_of_parameters(z) ; print()

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += data[label_var].num_samples                     # count samples processed so far

        trainer.summarize_training_progress()
        z.save(os.path.join(model_path, "ConvNet_MNIST_{}.dnn".format(epoch)))
    
    # Load test data
    reader_test = create_reader(os.path.join(data_path, 'Test-28x28_cntk_text.txt'), False, input_dim, num_output_classes)

    input_map = {
        input_var : reader_test.streams.features,
        label_var : reader_test.streams.labels
    }

    # Test data for trained model
    epoch_size = 10000
    minibatch_size = 1024

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0
    minibatch_index = 0

    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)

        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)

        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch

        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")

    return metric_numer/metric_denom
Exemple #40
0
def convnetlrn_cifar10_dataaug(reader_train,
                               reader_test,
                               epoch_size=50000,
                               max_epochs=80):
    _cntk_py.set_computation_network_trace_level(0)

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    # apply model to input
    scaled_input = C.element_times(C.constant(0.00390625), input_var)

    with C.layers.default_options(activation=C.relu, pad=True):
        z = C.layers.Sequential([
            C.layers.For(
                range(2), lambda: [
                    C.layers.Convolution2D((3, 3), 64),
                    C.layers.Convolution2D((3, 3), 64),
                    LocalResponseNormalization(1.0, 4, 0.001, 0.75),
                    C.layers.MaxPooling((3, 3), (2, 2))
                ]),
            C.layers.For(
                range(2), lambda i:
                [C.layers.Dense([256, 128][i]),
                 C.layers.Dropout(0.5)]),
            C.layers.Dense(num_classes, activation=None)
        ])(scaled_input)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    # training config
    minibatch_size = 64

    # Set learning parameters
    lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [
        0.00015625
    ] * 20 + [0.000046875] * 10 + [0.000015625]
    lr_schedule = C.learning_parameter_schedule_per_sample(
        lr_per_sample, epoch_size=epoch_size)
    mms = [0] * 20 + [0.9983347214509387] * 20 + [0.9991670137924583]
    mm_schedule = C.learners.momentum_schedule_per_sample(
        mms, epoch_size=epoch_size)
    l2_reg_weight = 0.002

    # trainer object
    learner = C.learners.momentum_sgd(z.parameters,
                                      lr_schedule,
                                      mm_schedule,
                                      unit_gain=True,
                                      l2_regularization_weight=l2_reg_weight)
    progress_printer = C.logging.ProgressPrinter(tag='Training',
                                                 num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    C.logging.log_number_of_parameters(z)
    print()

    # perform model training
    for epoch in range(max_epochs):  # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(
                min(minibatch_size, epoch_size - sample_count),
                input_map=input_map)  # fetch minibatch.
            trainer.train_minibatch(data)  # update model with it
            sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far

        trainer.summarize_training_progress()
        z.save(
            os.path.join(model_path,
                         "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))

    ### Evaluation action
    epoch_size = 10000
    minibatch_size = 16

    # process minibatches and evaluate the model
    metric_numer = 0
    metric_denom = 0
    sample_count = 0
    minibatch_index = 0

    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)
        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch,
                                          input_map=input_map)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(
        minibatch_index + 1, (metric_numer * 100.0) / metric_denom,
        metric_denom))
    print("")

    return metric_numer / metric_denom
Exemple #41
0
def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
    # Replace 0 with 1 to get detailed log.
    set_computation_network_trace_level(0)

    # These values must match for both train and test reader.
    image_height       = train_reader.height
    image_width        = train_reader.width
    num_channels       = train_reader.channel_count
    sequence_length    = train_reader.sequence_length
    num_output_classes = train_reader.label_count

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, sequence_length, image_height, image_width), np.float32)
    label_var = C.input_variable(num_output_classes, np.float32)

    # Instantiate simple 3D Convolution network inspired by VGG network 
    # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf
    with C.default_options (activation=C.relu):
        z = C.layers.Sequential([
            C.layers.Convolution3D((3,3,3), 64, pad=True),
            C.layers.MaxPooling((1,2,2), (1,2,2)),
            C.layers.For(range(3), lambda i: [
                C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True),
                C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True),
                C.layers.MaxPooling((2,2,2), (2,2,2))
            ]),
            C.layers.For(range(2), lambda : [
                C.layers.Dense(1024), 
                C.layers.Dropout(0.5)
            ]),
            C.layers.Dense(num_output_classes, activation=None)
        ])(input_var)
    
    # loss and classification error.
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    # training config
    train_epoch_size     = train_reader.size()
    train_minibatch_size = 2

    # Set learning parameters
    lr_per_sample          = [0.01]*10+[0.001]*10+[0.0001]
    lr_schedule            = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=train_epoch_size)
    momentum_per_sample = 0.9997558891748972
    mm_schedule            = C.momentum_schedule_per_sample([momentum_per_sample])

    # Instantiate the trainer object to drive the model training
    learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    log_number_of_parameters(z) ; print()

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
        train_reader.reset()

        while train_reader.has_more():
            videos, labels, current_minibatch = train_reader.next_minibatch(train_minibatch_size)
            trainer.train_minibatch({input_var : videos, label_var : labels})

        trainer.summarize_training_progress()

    # Test data for trained model
    epoch_size     = test_reader.size()
    test_minibatch_size = 2

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    minibatch_index = 0

    test_reader.reset()    
    while test_reader.has_more():
        videos, labels, current_minibatch = test_reader.next_minibatch(test_minibatch_size)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch({input_var : videos, label_var : labels}) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")

    return metric_numer/metric_denom
Exemple #42
0
def test_learner_empy_parameters_list():
    lr_per_sample = C.learning_parameter_schedule_per_sample(0.1)
    with pytest.raises(ValueError):
        learner = C.sgd([], lr_per_sample)
Exemple #43
0
def test_sweep_based_schedule(tmpdir, device_id):
    from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs
    from cntk import cross_entropy_with_softmax, classification_error, plus, reduce_sum, sequence
    from cntk import Trainer

    input_dim = 69

    ctf_data = '''\
0   |S0 3:1   |S1 3:1 |# <s>
0   |S0 4:1 |# A    |S1 32:1 |# ~AH
0   |S0 5:1 |# B    |S1 36:1 |# ~B
0   |S0 4:1 |# A    |S1 31:1 |# ~AE
0   |S0 7:1 |# D    |S1 38:1 |# ~D
0   |S0 12:1 |# I   |S1 47:1 |# ~IY
0   |S0 1:1 |# </s> |S1 1:1 |# </s>
2   |S0 60:1 |# <s> |S1 3:1 |# <s>
2   |S0 61:1 |# A   |S1 32:1 |# ~AH
'''
    ctf_file = str(tmpdir/'2seqtest.txt')
    with open(ctf_file, 'w') as f:
        f.write(ctf_data)

    mbs = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs(
        features  = StreamDef(field='S0', shape=input_dim,  is_sparse=True),
        labels    = StreamDef(field='S1', shape=input_dim,  is_sparse=True)
    )), randomize=False)

    in1 = sequence.input_variable(shape=(input_dim,))
    labels = sequence.input_variable(shape=(input_dim,))
    p = parameter(shape=(input_dim,), init=10)
    z = plus(in1, reduce_sum(p), name='z')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    lr_per_sample = C.learning_parameter_schedule_per_sample([0.3, 0.2, 0.1, 0.0])
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])

    input_map = {
        in1       : mbs.streams.features,
        labels : mbs.streams.labels
    }

    # fetch minibatch (first sequence)
    data = mbs.next_minibatch(1, input_map=input_map)
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.3

    # fetch minibatch (second sequence, sweep ends at this point)
    data = mbs.next_minibatch(1, input_map=input_map)
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.2

    # fetch minibatch (both sequences -- entire sweep in one go)
    data = mbs.next_minibatch(9, input_map=input_map)
    trainer.train_minibatch(data)
    assert learner.learning_rate() == 0.1

    # fetch minibatch (multiple sweeps)
    data = mbs.next_minibatch(30, input_map=input_map)
    trainer.train_minibatch(data, outputs=[z.output])
    assert learner.learning_rate() == 0.0
Exemple #44
0
def train_and_test(reader_train, reader_test, model_func):

    ###############################
    # Training the model
    ###############################

    input = C.input_variable(input_dim)
    label = C.input_variable(input_dim)

    model = model_func(input)

    target = label / 255.0
    loss = -(target * C.log(model) + (1 - target) * C.log(1 - model))
    label_error = C.classification_error(model, target)

    epoch_size = 30000
    minibatch_size = 64
    num_sweeps_to_train_with = 5 if isFast else 100
    num_samples_per_sweep = 60000
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) // minibatch_size

    lr_per_sample = [3e-4]
    lr_schedule = C.learning_parameter_schedule_per_sample(
        lr_per_sample, epoch_size)

    momentum_schedule = C.momentum_schedule(0.9126265014311797, minibatch_size)

    learner = C.fsadagrad(model.parameters,
                          lr=lr_schedule,
                          momentum=momentum_schedule)

    progress_printer = C.logging.ProgressPrinter(0)
    trainer = C.Trainer(model, (loss, label_error), learner, progress_printer)

    input_map = {
        input: reader_train.streams.features,
        label: reader_train.streams.features
    }

    aggregate_metric = 0
    for i in range(num_minibatches_to_train):
        data = reader_train.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(data)
        samples = trainer.previous_minibatch_sample_count
        aggregate_metric += trainer.previous_minibatch_evaluation_average * samples

    train_error = (aggregate_metric *
                   100) / (trainer.total_number_of_samples_seen)
    print("Average training error: {0:0.2f}%".format(train_error))

    #############################################################################
    # Testing the model
    # Note: we use a test file reader to read data different from a training data
    #############################################################################

    test_minibatch_size = 32
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0

    # Test error metric calculation
    metric_numer = 0
    metric_denom = 0

    test_input_map = {
        input: reader_test.streams.features,
        label: reader_test.streams.features
    }

    for i in range(0, int(num_minibatches_to_test)):
        data = reader_test.next_minibatch(test_minibatch_size,
                                          input_map=test_input_map)
        eval_error = trainer.test_minibatch(data)
        metric_numer += np.abs(eval_error * test_minibatch_size)
        metric_denom += test_minibatch_size
    test_error = (metric_numer * 100) / (metric_denom)
    print("Average test error: {0:0.2f}%".format(test_error))

    return model, train_error, test_error
def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_epochs = 80):
    _cntk_py.set_computation_network_trace_level(0)

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    # apply model to input
    scaled_input = C.element_times(C.constant(0.00390625), input_var)

    with C.layers.default_options (activation=C.relu, pad=True):
        z = C.layers.Sequential([
            C.layers.For(range(2), lambda : [
                C.layers.Convolution2D((3,3), 64),
                C.layers.Convolution2D((3,3), 64),
                LocalResponseNormalization (1.0, 4, 0.001, 0.75),
                C.layers.MaxPooling((3,3), (2,2))
            ]),
            C.layers.For(range(2), lambda i: [
                C.layers.Dense([256,128][i]),
                C.layers.Dropout(0.5)
            ]),
            C.layers.Dense(num_classes, activation=None)
        ])(scaled_input)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    # training config
    minibatch_size = 64

    # Set learning parameters
    lr_per_sample          = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
    lr_schedule            = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
    mms                    = [0]*20 + [0.9983347214509387]*20 + [0.9991670137924583]
    mm_schedule            = C.learners.momentum_schedule_per_sample(mms, epoch_size=epoch_size)
    l2_reg_weight          = 0.002

    # trainer object
    learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule,
                                        unit_gain = True,
                                        l2_regularization_weight = l2_reg_weight)
    progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    C.logging.log_number_of_parameters(z) ; print()

    # perform model training
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += trainer.previous_minibatch_sample_count         # count samples processed so far

        trainer.summarize_training_progress()
        z.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))

    ### Evaluation action
    epoch_size     = 10000
    minibatch_size = 16

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0
    minibatch_index = 0

    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)
        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")

    return metric_numer/metric_denom
    results = re.findall("Completed successfully.", str_out)
    if len(results) != 2:
        print(str_out)
        assert False

if __name__=='__main__':
    in1 = C.input_variable(shape=1)
    labels = C.input_variable(shape=1)
    p1 = parameter(shape=1)
    p2 = parameter(shape=1)
    n = plus(in1, p1, name='n')
    z = plus(n, p2, name='z')
    ce = squared_error(z, labels)

    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    lr_per_sample = C.learning_parameter_schedule_per_sample(0.007)
    dist_learners = [
        C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p1], lr_per_sample, momentum_schedule, True)),
        C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p2], lr_per_sample, momentum_schedule, True))
    ]

    trainer = C.Trainer(z, ce, dist_learners)
    in1_value = [[1]]
    label_value = [[0]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output

    def check_samples(learners, expected_number_of_samples):
        for learner in learners:
            if learner.total_number_of_samples_seen != expected_number_of_samples:
                print("Completed with exception.")
def convnet_mnist(max_epochs, output_dir, data_dir, debug_output=False, epoch_size=60000, minibatch_size=64):
    """Creates and trains a feedforward classification model for MNIST images."""
    image_height = 28
    image_width = 28
    num_channels = 1
    input_dim = image_height * image_width * num_channels
    num_output_classes = 10

    # Input variables denoting the features and label data
    input_var = C.ops.input_variable((num_channels, image_height, image_width), np.float32)
    label_var = C.ops.input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = C.ops.element_times(C.ops.constant(0.00390625), input_var)

    with C.layers.default_options(activation=C.ops.relu, pad=False):
        conv1 = C.layers.Convolution2D((5, 5), 32, pad=True)(scaled_input)
        pool1 = C.layers.MaxPooling((3, 3), (2, 2))(conv1)
        conv2 = C.layers.Convolution2D((3, 3), 48)(pool1)
        pool2 = C.layers.MaxPooling((3, 3), (2, 2))(conv2)
        conv3 = C.layers.Convolution2D((3, 3), 64)(pool2)
        f4 = C.layers.Dense(96)(conv3)
        drop4 = C.layers.Dropout(0.5)(f4)
        z = C.layers.Dense(num_output_classes, activation=None)(drop4)

    ce = C.losses.cross_entropy_with_softmax(z, label_var)
    pe = C.metrics.classification_error(z, label_var)

    # Load train data
    reader_train = create_reader(os.path.join(data_dir, 'Train-28x28_cntk_text.txt'), True,
                                 input_dim, num_output_classes, max_epochs * epoch_size)
    # Load test data
    reader_test = create_reader(os.path.join(data_dir, 'Test-28x28_cntk_text.txt'), False,
                                input_dim, num_output_classes, C.io.FULL_DATA_SWEEP)

    # Set learning parameters
    lr_per_sample = [0.001] * 10 + [0.0005] * 10 + [0.0001]
    lr_schedule = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
    mms = [0] * 5 + [0.9990239141819757]
    mm_schedule = C.learners.momentum_schedule_per_sample(mms, epoch_size=epoch_size)

    # Instantiate the trainer object to drive the model training
    local_learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule)
    progress_printer = C.logging.ProgressPrinter(
        tag='Training',
        rank=C.train.distributed.Communicator.rank(),
        num_epochs=max_epochs,
    )

    learner = C.train.distributed.data_parallel_distributed_learner(local_learner)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map_train = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }

    input_map_test = {
        input_var: reader_test.streams.features,
        label_var: reader_test.streams.labels
    }

    C.logging.log_number_of_parameters(z)
    print()

    C.train.training_session(
        trainer=trainer,
        mb_source=reader_train,
        model_inputs_to_streams=input_map_train,
        mb_size=minibatch_size,
        progress_frequency=epoch_size,
        checkpoint_config=CheckpointConfig(frequency=epoch_size,
                                           filename=os.path.join(output_dir, "ConvNet_MNIST")),
        test_config=TestConfig(reader_test, minibatch_size=minibatch_size,
                               model_inputs_to_streams=input_map_test)
    ).train()

    return
def test_lattice_deserializer(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    data_dir = ''
    if 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ:
        data_dir = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
    else:
        print('CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY environment variable is not defined')

    print(data_dir)
    data_dir = os.path.join(data_dir, "Speech", "AN4Corpus", "v0")
    os.chdir(data_dir)
    feature_dimension = 33
    feature = C.sequence.input_variable(feature_dimension)

    label_dimension = 133
    label = C.sequence.input_variable(label_dimension)

    axis_lattice = C.Axis.new_unique_dynamic_axis('lattice_axis')
    lattice = C.sequence.input_variable(1, sequence_axis=axis_lattice)

    train_feature_filepath = os.path.join(data_dir,"glob_0000.scp")
    train_label_filepath = os.path.join(data_dir,"glob_0000.mlf")
    train_lattice_index_path = os.path.join(data_dir,"latticeIndex.txt")
    mapping_filepath = os.path.join(data_dir,"state.list")
    train_feature_stream = C.io.HTKFeatureDeserializer(
    C.io.StreamDefs(speech_feature = C.io.StreamDef(shape = feature_dimension, scp = train_feature_filepath)))
    train_label_stream = C.io.HTKMLFDeserializer(
    mapping_filepath, C.io.StreamDefs(speech_label = C.io.StreamDef(shape = label_dimension, mlf = train_label_filepath)), True)
    train_lattice_stream = C.io.LatticeDeserializer(train_lattice_index_path,C.io.StreamDefs(speech_lattice = C.io.StreamDef()))
    train_data_reader = C.io.MinibatchSource([train_feature_stream, train_label_stream, train_lattice_stream], frame_mode = False)
    train_input_map = {feature: train_data_reader.streams.speech_feature, label: train_data_reader.streams.speech_label, lattice: train_data_reader.streams.speech_lattice}

    feature_mean = np.fromfile(os.path.join("GlobalStats", "mean.363"), dtype=float, count=feature_dimension)
    feature_inverse_stddev = np.fromfile(os.path.join("GlobalStats", "var.363"), dtype=float, count=feature_dimension)

    feature_normalized = (feature - feature_mean) * feature_inverse_stddev

    with C.default_options(activation=C.sigmoid):
        z = C.layers.Sequential([
            C.layers.For(range(3), lambda: C.layers.Recurrence(C.layers.LSTM(1024))),
            C.layers.Dense(label_dimension)
        ])(feature_normalized)
    mbsize = 1024
    mbs_per_epoch = 10
    max_epochs = 2

    symListPath = os.path.join(data_dir,"CY2SCH010061231_1369712653.numden.lats.symlist")
    phonePath = os.path.join(data_dir,"model.overalltying")
    stateListPath = os.path.join(data_dir,"state.list")
    transProbPath = os.path.join(data_dir,"model.transprob")

    criteria = C.lattice_sequence_with_softmax(label, z, z, lattice, symListPath, phonePath, stateListPath, transProbPath)
    err = C.classification_error(label,z)
    lr = C.learning_parameter_schedule_per_sample([(3, .01), (1,.001)])
    mm = C.momentum_schedule([(1000, 0.9), (0, 0.99)], mbsize)
    learner = C.momentum_sgd(z.parameters, lr, mm)
    trainer = C.Trainer(z, (criteria, err), learner)

    C.logging.log_number_of_parameters(z)
    progress_printer = C.logging.progress_print.ProgressPrinter(tag='Training', num_epochs = max_epochs)


    for epoch in range(max_epochs):
        for mb in range(mbs_per_epoch):
            minibatch = train_data_reader.next_minibatch(mbsize, input_map = train_input_map)
            trainer.train_minibatch(minibatch)
            progress_printer.update_with_trainer(trainer, with_metric = True)

        progress_printer.epoch_summary(with_metric = True)

    assert np.allclose(trainer.previous_minibatch_evaluation_average, 0.15064, atol=TOLERANCE_ABSOLUTE)
    assert np.allclose(trainer.previous_minibatch_loss_average, 0.035923, atol=TOLERANCE_ABSOLUTE)
    assert (trainer.previous_minibatch_sample_count == 218)
    assert (trainer.total_number_of_samples_seen == 5750)
    print("Completed successfully.")
Exemple #49
0
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size):
    model_train = create_model_train(s2smodel)
    criterion = create_criterion_function(model_train)
    # also wire in a greedy decoder so that we can properly log progress on a validation example
    # This is not used for the actual training process.
    model_greedy = create_model_test(s2smodel)
    # Instantiate the trainer object to drive the model training
    minibatch_size = 72
    lr = 0.001 if use_attention else 0.005
    learner = C.fsadagrad(model_train.parameters,
                          #apply the learning rate as if it is a minibatch of size 1
                          lr = C.learning_parameter_schedule_per_sample([lr]*2+[lr/2]*3+[lr/4], epoch_size),
                          momentum = C.momentum_schedule(0.9366416204111472, minibatch_size=minibatch_size),
                          gradient_clipping_threshold_per_sample=2.3,
                          gradient_clipping_with_truncation=True)
    trainer = C.Trainer(None, criterion, learner)
    
    # records
    total_samples = 0
    mbs = 0
    eval_freq = 100

    # print out some useful training information
    C.logging.log_number_of_parameters(model_train) ; print()
    progress_printer = C.logging.ProgressPrinter(freq=30, tag='Training')

    # a hack to allow us to print sparse vectors
    sparse_to_dense = create_sparse_to_dense(input_vocab_dim)

    for epoch in range(max_epochs):
        while total_samples < (epoch+1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)

            # do the training
            trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features],
                                     criterion.arguments[1]: mb_train[train_reader.streams.labels]})

            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % eval_freq == 0:
                mb_valid = valid_reader.next_minibatch(1)

                # run an eval on the decoder output model (i.e. don't use the groundtruth)
                e = model_greedy(mb_valid[valid_reader.streams.features])
                print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w))
                print("->")
                print(format_sequences(e, i2w))

                # visualizing attention window
                if use_attention:
                    debug_attention(model_greedy, mb_valid[valid_reader.streams.features])

            total_samples += mb_train[train_reader.streams.labels].num_samples
            mbs += 1

        # log a summary of the stats for the epoch
        progress_printer.epoch_summary(with_metric=True)

    # done: save the final model
    model_path = "model_%d.cmf" % epoch
    print("Saving final model to '%s'" % model_path)
    s2smodel.save(model_path)
    print("%d epochs complete." % max_epochs)