def build_graph(noise_shape, image_shape, G_progress_printer, D_progress_printer):
    input_dynamic_axes = [C.Axis.default_batch_axis()]
    Z = C.input_variable(noise_shape, dynamic_axes=input_dynamic_axes)
    X_real = C.input_variable(image_shape, dynamic_axes=input_dynamic_axes)
    X_real_scaled = 2*(X_real / 255.0) - 1.0

    # Create the model function for the generator and discriminator models
    X_fake = generator(Z)
    D_real = discriminator(X_real_scaled)
    D_fake = D_real.clone(
        method = 'share',
        substitutions = {X_real_scaled.output: X_fake.output}
    )

    # Create loss functions and configure optimazation algorithms
    G_loss = 1.0 - C.log(D_fake)
    D_loss = -(C.log(D_real) + C.log(1.0 - D_fake))

    G_learner = C.fsadagrad(
       parameters = X_fake.parameters,
        lr = C.learning_parameter_schedule_per_sample(lr),
        momentum = C.momentum_schedule_per_sample(0.9985724484938566)
    )
    D_learner = C.fsadagrad(
        parameters = D_real.parameters,
        lr = C.learning_parameter_schedule_per_sample(lr),
        momentum = C.momentum_schedule_per_sample(0.9985724484938566)
    )

    DistG_learner = C.train.distributed.data_parallel_distributed_learner(G_learner)
    
    # The following API marks a learner as the matric aggregator, which is used by 
    # the trainer to determine the training progress.
    # It is required, only when more than one learner is provided to a *single* trainer. 
    # In this example, we use two trainers each with a single learner, so it 
    # is not required and automatically set by CNTK for each single learner. However, if you 
    # plan to use both learners with a single trainer, then it needs to be call before 
    # creating the trainer.
    #DistG_learner.set_as_metric_aggregator()

    DistD_learner = C.train.distributed.data_parallel_distributed_learner(D_learner)

    # Instantiate the trainers
    G_trainer = C.Trainer(
        X_fake,
        (G_loss, None),
        DistG_learner,
        G_progress_printer
    )
    D_trainer = C.Trainer(
        D_real,
        (D_loss, None),
        DistD_learner,
        D_progress_printer
    )

    return X_real, X_fake, Z, G_trainer, D_trainer
Example #2
0
def train_lm(testing=False):
    data = DataReader(token_to_id_path, segment_sepparator)

    # Create model nodes for the source and target inputs
    input_sequence, label_sequence = create_inputs(data.vocab_dim)

    # Create the model. It has three output nodes
    # z: the input to softmax that  provides the latent representation of the next token
    # cross_entropy: this is used training criterion
    # error: this a binary indicator if the model predicts the correct token
    z, cross_entropy, error = create_model(input_sequence, label_sequence, data.vocab_dim, hidden_dim)

    # For measurement we use the (build in) full softmax.
    full_ce = C.cross_entropy_with_softmax(z, label_sequence)

    # print out some useful training information
    log_number_of_parameters(z) ; print()
    
    # Run the training loop
    num_trained_samples = 0
    num_trained_samples_since_last_report = 0

    # Instantiate the trainer object to drive the model training
    lr_schedule = C.learning_parameter_schedule_per_sample(learning_rate)
    momentum_schedule = C.momentum_schedule_per_sample(momentum_per_sample)
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule,
                            gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
                            gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    trainer = Trainer(z, (cross_entropy, error), learner)

    last_avg_ce = 0
    for epoch_count in range(num_epochs):
        for features, labels, token_count in data.minibatch_generator(train_file_path, sequence_length, sequences_per_batch):
            arguments = ({input_sequence : features, label_sequence : labels})

            t_start = timeit.default_timer()
            trainer.train_minibatch(arguments)
            t_end =  timeit.default_timer()

            samples_per_second = token_count / (t_end - t_start)

            # Print progress report every num_samples_between_progress_report samples

            if num_trained_samples_since_last_report >= num_samples_between_progress_report or num_trained_samples == 0:
                av_ce = average_cross_entropy(full_ce, input_sequence, label_sequence, data)
                print_progress(samples_per_second, av_ce, num_trained_samples, t_start)
                num_trained_samples_since_last_report = 0
                last_avg_ce = av_ce

            num_trained_samples += token_count
            num_trained_samples_since_last_report += token_count

        if not testing:
            # after each epoch save the model
            model_filename = "models/lm_epoch%d.dnn" % epoch_count
            z.save(model_filename)
            print("Saved model to '%s'" % model_filename)

    return last_avg_ce
Example #3
0
    def create_trainer(self):
        try:
            p = self.output.parameters
            # Three of four parameters are learned by block_momentum_distributed_learner.
            bmd_learner = cntk.block_momentum_distributed_learner(
                cntk.momentum_sgd(
                    [p[0], p[1], p[2]],
                    cntk.learning_parameter_schedule(0.0001),
                    cntk.momentum_as_time_constant_schedule(1000)),
                block_size=1000,
                block_learning_rate=0.01,
                block_momentum_as_time_constant=1000)

            # New API to mark which learner is to use for metric aggregaion.
            bmd_learner.set_as_metric_aggregator()

            # The last parameter is learned by the data_parallel_distributed_learner.
            momentum_schedule = cntk.momentum_schedule_per_sample(
                0.9990913221888589)
            lr_per_sample = cntk.learning_parameter_schedule_per_sample(0.007)
            dpd_learner = cntk.data_parallel_distributed_learner(
                cntk.momentum_sgd([p[3]], lr_per_sample, momentum_schedule,
                                  True))

            comm_rank = cntk.distributed.Communicator.rank()
            self.trainer = cntk.Trainer(
                self.output, (self.ce, self.err), [bmd_learner, dpd_learner], [
                    cntk.logging.ProgressPrinter(
                        freq=progress_freq, tag="Training", rank=comm_rank)
                ])
        except RuntimeError:
            self.trainer = None
        return
Example #4
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(
        StreamDefs(amazing_features=StreamDef(
            shape=feature_dim, context=(context, context), scp=features_file)))

    ld = HTKMLFDeserializer(
        label_mapping_file,
        StreamDefs(
            awesome_labels=StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd, ld])

    features = C.sequence.input_variable(((2 * context + 1) * feature_dim))
    labels = C.sequence.input_variable((num_classes))

    model = Sequential(
        [For(range(3), lambda: Recurrence(LSTM(256))),
         Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error(z, labels)

    learner = C.fsadagrad(
        z.parameters,
        lr=C.learning_parameter_schedule_per_sample(lr, epoch_size=epoch_size),
        momentum=C.momentum_schedule_per_sample(0.9990913221888589),
        gradient_clipping_threshold_per_sample=15,
        gradient_clipping_with_truncation=True)
    progress_printer = C.logging.ProgressPrinter(freq=0)
    trainer = C.Trainer(z, (ce, errs), learner, progress_printer)

    input_map = {
        features: reader.streams.amazing_features,
        labels: reader.streams.awesome_labels
    }

    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
    assert True
    os.chdir(abs_path)
Example #5
0
def create_learner(model):
    '''Create the optimized method'''
    lr_per_minibatch = C.learning_parameter_schedule(opt.lr)
    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_minibatch)
    elif opt.optim == 'adam':
        return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_schedule)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_minibatch)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
Example #6
0
def create_learner(model):
    '''Create the optimized method'''
    optim = "momentum_sgd"
    lr = 0.001
    lr_per_sample = C.learning_parameter_schedule_per_sample(lr)
    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    if optim == 'momentum_sgd':
        clipping_threshold_per_sample = 5.0
        gradient_clipping_with_truncation = True
        return C.momentum_sgd(model.parameters, lr_per_sample, momentum_schedule,
                              gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
                              gradient_clipping_with_truncation=gradient_clipping_with_truncation)
Example #7
0
    def Loss(self):
        # Evaluating old actions and values :
        logprobs, state_value, dist_entropy = self.policy.evaluate()

        # Finding the ratio (pi_theta / pi_theta__old): # (importance sampling)
        c_old_logprobs = C.input_variable(logprobs.shape, name='old_log_probs')
        ratios = C.exp(logprobs - C.stop_gradient(c_old_logprobs))

        c_rewards = C.input_variable(1, name='rewards')
        advantages = c_rewards - C.stop_gradient(state_value)

        # Finding Surrogate Loss:
        surr1 = ratios * advantages
        surr2 = C.clip(ratios, 1 - self.eps_clip,
                       1 + self.eps_clip) * advantages
        neglog_loss = -C.element_min(surr1, surr2)
        entropy_loss = -0.01 * dist_entropy
        actor_loss = C.reduce_mean(neglog_loss + entropy_loss)
        critic_loss = 0.5 * C.reduce_mean(C.square(state_value - c_rewards))
        loss = actor_loss + critic_loss

        chunk = {
            'neglog_loss': neglog_loss,
            'entropy_loss': entropy_loss,
            'actor_loss': actor_loss,
            'critic_loss': critic_loss
        }

        trainer = C.Trainer(
            loss, (loss, None),
            C.adam(loss.parameters,
                   C.learning_parameter_schedule_per_sample(self.lr),
                   C.momentum_schedule_per_sample(self.betas[0]),
                   variance_momentum=C.momentum_schedule_per_sample(
                       self.betas[1])))
        # trainer = C.Trainer(loss, (loss, None), C.adam(loss.parameters, C.learning_parameter_schedule(10), C.momentum_schedule(0.9), variance_momentum=C.momentum_schedule(0.999))) # higher learning rate

        return loss, chunk, trainer
def create_learner(model):
    '''Create the optimized method'''
    lr_per_minibatch = C.learning_parameter_schedule(opt.lr)
    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    if opt.optim == 'sgd':
        return C.sgd(model.parameters, lr=lr_per_minibatch)
    elif opt.optim == 'adam':
        return C.adam(model.parameters,
                      lr=lr_per_minibatch,
                      momentum=momentum_schedule)
    elif opt.optim == 'adagrad':
        return C.adagrad(model.parameters, lr=lr_per_minibatch)
    else:
        raise RuntimeError("Invalid optim method: " + opt.optim)
Example #9
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(StreamDefs(
        amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))

    ld = HTKMLFDeserializer(label_mapping_file, StreamDefs(
        awesome_labels = StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd,ld])

    features = C.sequence.input_variable(((2*context+1)*feature_dim))
    labels = C.sequence.input_variable((num_classes))

    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error    (z, labels)

    learner = C.fsadagrad(z.parameters,
                          lr=C.learning_parameter_schedule_per_sample(lr, epoch_size=epoch_size),
                          momentum=C.momentum_schedule_per_sample(0.9990913221888589),
                          gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
    progress_printer = C.logging.ProgressPrinter(freq=0)
    trainer = C.Trainer(z, (ce, errs), learner, progress_printer)

    input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels }

    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
    assert True
    os.chdir(abs_path)
Example #10
0
def train_lm(testing=False):
    data = DataReader(token_to_id_path, segment_sepparator)

    # Create model nodes for the source and target inputs
    input_sequence, label_sequence = create_inputs(data.vocab_dim)

    # Create the model. It has three output nodes
    # z: the input to softmax that  provides the latent representation of the next token
    # cross_entropy: this is used training criterion
    # error: this a binary indicator if the model predicts the correct token
    z, cross_entropy, error = create_model(input_sequence, label_sequence,
                                           data.vocab_dim, hidden_dim)

    # For measurement we use the (build in) full softmax.
    full_ce = C.cross_entropy_with_softmax(z, label_sequence)

    # print out some useful training information
    log_number_of_parameters(z)
    print()

    # Run the training loop
    num_trained_samples = 0
    num_trained_samples_since_last_report = 0

    # Instantiate the trainer object to drive the model training
    lr_schedule = C.learning_parameter_schedule_per_sample(learning_rate)
    momentum_schedule = C.momentum_schedule_per_sample(momentum_per_sample)
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(
        z.parameters,
        lr_schedule,
        momentum_schedule,
        gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
        gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    trainer = Trainer(z, (cross_entropy, error), learner)

    last_avg_ce = 0
    for epoch_count in range(num_epochs):
        for features, labels, token_count in data.minibatch_generator(
                train_file_path, sequence_length, sequences_per_batch):
            arguments = ({input_sequence: features, label_sequence: labels})

            t_start = timeit.default_timer()
            trainer.train_minibatch(arguments)
            t_end = timeit.default_timer()

            samples_per_second = token_count / (t_end - t_start)

            # Print progress report every num_samples_between_progress_report samples

            if num_trained_samples_since_last_report >= num_samples_between_progress_report or num_trained_samples == 0:
                av_ce = average_cross_entropy(full_ce, input_sequence,
                                              label_sequence, data)
                print_progress(samples_per_second, av_ce, num_trained_samples,
                               t_start)
                num_trained_samples_since_last_report = 0
                last_avg_ce = av_ce

            num_trained_samples += token_count
            num_trained_samples_since_last_report += token_count

        if not testing:
            # after each epoch save the model
            model_filename = "models/lm_epoch%d.dnn" % epoch_count
            z.save(model_filename)
            print("Saved model to '%s'" % model_filename)

    return last_avg_ce
    results = re.findall("Completed successfully.", str_out)
    if len(results) != 2:
        print(str_out)
        assert False

if __name__=='__main__':
    in1 = C.input_variable(shape=1)
    labels = C.input_variable(shape=1)
    p1 = parameter(shape=1)
    p2 = parameter(shape=1)
    n = plus(in1, p1, name='n')
    z = plus(n, p2, name='z')
    ce = squared_error(z, labels)

    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    lr_per_sample = C.learning_parameter_schedule_per_sample(0.007)
    dist_learners = [
        C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p1], lr_per_sample, momentum_schedule, True)),
        C.distributed.data_parallel_distributed_learner(C.momentum_sgd([p2], lr_per_sample, momentum_schedule, True))
    ]

    trainer = C.Trainer(z, ce, dist_learners)
    in1_value = [[1]]
    label_value = [[0]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output

    def check_samples(learners, expected_number_of_samples):
        for learner in learners:
            if learner.total_number_of_samples_seen != expected_number_of_samples:
Example #12
0
def build_graph(noise_shape, image_shape,
                G_progress_printer, D_progress_printer):
    '''
    The rest of the computational graph is mostly responsible for 
    coordinating the training algorithms and parameter updates, 
    which is particularly tricky with GANs for couple reasons.

    First, the discriminator must be used on both the real MNIST 
    images and fake images generated by the generator function. 
    One way to represent this in the computational graph is to 
    create a clone of the output of the discriminator function, 
    but with substituted inputs. Setting method=share in the 
    clone function ensures that both paths through the 
    discriminator model use the same set of parameters.

    Second, we need to update the parameters for the generator 
    and discriminator model separately using the gradients from 
    different loss functions. We can get the parameters for a 
    Function in the graph with the parameters attribute. However, 
    when updating the model parameters, update only the parameters 
    of the respective models while keeping the other parameters 
    unchanged. In other words, when updating the generator we will 
    update only the parameters of the  GG  function while keeping 
    the parameters of the  DD  function fixed and vice versa.
    '''
    input_dynamic_axes = [C.Axis.default_batch_axis()]
    Z = C.input_variable(noise_shape, dynamic_axes=input_dynamic_axes)
    X_real = C.input_variable(image_shape, dynamic_axes=input_dynamic_axes)
    X_real_scaled = 2*(X_real / 255.0) - 1.0

    # Create the model function for the generator and discriminator models
    X_fake = generator(Z)
    D_real = discriminator(X_real_scaled)
    D_fake = D_real.clone(
        method = 'share',
        substitutions = {X_real_scaled.output: X_fake.output}
    )

    # Create loss functions and configure optimazation algorithms
    G_loss = 1.0 - C.log(D_fake)
    D_loss = -(C.log(D_real) + C.log(1.0 - D_fake))

    G_learner = C.fsadagrad(
        parameters = X_fake.parameters,
        lr = C.learning_parameter_schedule_per_sample(lr),
        momentum = C.momentum_schedule_per_sample(0.9985724484938566)
    )
    D_learner = C.fsadagrad(
        parameters = D_real.parameters,
        lr = C.learning_parameter_schedule_per_sample(lr),
        momentum = C.momentum_schedule_per_sample(0.9985724484938566)
    )

    # Instantiate the trainers
    G_trainer = C.Trainer(
        X_fake,
        (G_loss, None),
        G_learner,
        G_progress_printer
    )
    D_trainer = C.Trainer(
        D_real,
        (D_loss, None),
        D_learner,
        D_progress_printer
    )

    return X_real, X_fake, Z, G_trainer, D_trainer
def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
    # Replace 0 with 1 to get detailed log.
    set_computation_network_trace_level(0)

    # These values must match for both train and test reader.
    image_height = train_reader.height
    image_width = train_reader.width
    num_channels = train_reader.channel_count
    sequence_length = train_reader.sequence_length
    num_output_classes = train_reader.label_count

    # Input variables denoting the features and label data
    input_var = C.input_variable(
        (num_channels, sequence_length, image_height, image_width), np.float32)
    label_var = C.input_variable(num_output_classes, np.float32)

    # Instantiate simple 3D Convolution network inspired by VGG network
    # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf
    with C.default_options(activation=C.relu):
        z = C.layers.Sequential([
            C.layers.Convolution3D((3, 3, 3), 64, pad=True),
            C.layers.MaxPooling((1, 2, 2), (1, 2, 2)),
            C.layers.For(
                range(3), lambda i: [
                    C.layers.Convolution3D(
                        (3, 3, 3), [96, 128, 128][i], pad=True),
                    C.layers.Convolution3D(
                        (3, 3, 3), [96, 128, 128][i], pad=True),
                    C.layers.MaxPooling((2, 2, 2), (2, 2, 2))
                ]),
            C.layers.For(range(2),
                         lambda: [C.layers.Dense(1024),
                                  C.layers.Dropout(0.5)]),
            C.layers.Dense(num_output_classes, activation=None)
        ])(input_var)

    # loss and classification error.
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    # training config
    train_epoch_size = train_reader.size()
    train_minibatch_size = 2

    # Set learning parameters
    lr_per_sample = [0.01] * 10 + [0.001] * 10 + [0.0001]
    lr_schedule = C.learning_parameter_schedule_per_sample(
        lr_per_sample, epoch_size=train_epoch_size)
    momentum_per_sample = 0.9997558891748972
    mm_schedule = C.momentum_schedule_per_sample([momentum_per_sample])

    # Instantiate the trainer object to drive the model training
    learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    log_number_of_parameters(z)
    print()

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):  # loop over epochs
        train_reader.reset()

        while train_reader.has_more():
            videos, labels, current_minibatch = train_reader.next_minibatch(
                train_minibatch_size)
            trainer.train_minibatch({input_var: videos, label_var: labels})

        trainer.summarize_training_progress()

    # Test data for trained model
    epoch_size = test_reader.size()
    test_minibatch_size = 2

    # process minibatches and evaluate the model
    metric_numer = 0
    metric_denom = 0
    minibatch_index = 0

    test_reader.reset()
    while test_reader.has_more():
        videos, labels, current_minibatch = test_reader.next_minibatch(
            test_minibatch_size)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch({
            input_var: videos,
            label_var: labels
        }) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(
        minibatch_index + 1, (metric_numer * 100.0) / metric_denom,
        metric_denom))
    print("")

    return metric_numer / metric_denom
Example #14
0
def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
    # Replace 0 with 1 to get detailed log.
    set_computation_network_trace_level(0)

    # These values must match for both train and test reader.
    image_height       = train_reader.height
    image_width        = train_reader.width
    num_channels       = train_reader.channel_count
    sequence_length    = train_reader.sequence_length
    num_output_classes = train_reader.label_count

    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, sequence_length, image_height, image_width), np.float32)
    label_var = C.input_variable(num_output_classes, np.float32)

    # Instantiate simple 3D Convolution network inspired by VGG network 
    # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf
    with C.default_options (activation=C.relu):
        z = C.layers.Sequential([
            C.layers.Convolution3D((3,3,3), 64, pad=True),
            C.layers.MaxPooling((1,2,2), (1,2,2)),
            C.layers.For(range(3), lambda i: [
                C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True),
                C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True),
                C.layers.MaxPooling((2,2,2), (2,2,2))
            ]),
            C.layers.For(range(2), lambda : [
                C.layers.Dense(1024), 
                C.layers.Dropout(0.5)
            ]),
            C.layers.Dense(num_output_classes, activation=None)
        ])(input_var)
    
    # loss and classification error.
    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    # training config
    train_epoch_size     = train_reader.size()
    train_minibatch_size = 2

    # Set learning parameters
    lr_per_sample          = [0.01]*10+[0.001]*10+[0.0001]
    lr_schedule            = C.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=train_epoch_size)
    momentum_per_sample = 0.9997558891748972
    mm_schedule            = C.momentum_schedule_per_sample([momentum_per_sample])

    # Instantiate the trainer object to drive the model training
    learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
    progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs)
    trainer = C.Trainer(z, (ce, pe), learner, progress_printer)

    log_number_of_parameters(z) ; print()

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
        train_reader.reset()

        while train_reader.has_more():
            videos, labels, current_minibatch = train_reader.next_minibatch(train_minibatch_size)
            trainer.train_minibatch({input_var : videos, label_var : labels})

        trainer.summarize_training_progress()

    # Test data for trained model
    epoch_size     = test_reader.size()
    test_minibatch_size = 2

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    minibatch_index = 0

    test_reader.reset()    
    while test_reader.has_more():
        videos, labels, current_minibatch = test_reader.next_minibatch(test_minibatch_size)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch({input_var : videos, label_var : labels}) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")

    return metric_numer/metric_denom
Example #15
0
    results = re.findall("Completed successfully.", str_out)
    if len(results) != 2:
        print(str_out)
        assert False


if __name__ == '__main__':
    in1 = C.input_variable(shape=1)
    labels = C.input_variable(shape=1)
    p1 = parameter(shape=1)
    p2 = parameter(shape=1)
    n = plus(in1, p1, name='n')
    z = plus(n, p2, name='z')
    ce = squared_error(z, labels)

    momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589)
    lr_per_sample = C.learning_parameter_schedule_per_sample(0.007)
    learner1 = C.distributed.data_parallel_distributed_learner(
        C.momentum_sgd([p1], lr_per_sample, momentum_schedule, True))
    learner1.set_as_metric_aggregator()
    dist_learners = [
        learner1,
        C.distributed.data_parallel_distributed_learner(
            C.momentum_sgd([p2], lr_per_sample, momentum_schedule, True))
    ]

    trainer = C.Trainer(z, ce, dist_learners)
    in1_value = [[1]]
    label_value = [[0]]
    arguments = {in1: in1_value, labels: label_value}
    z_output = z.output
def trainNetwork():

    mapper, gens = loadData(dir + fileName,
                            './data/Shakespeare',
                            batchSize,
                            timeSteps,
                            timeShift,
                            load=False,
                            lineShape=(0, 40000))

    # Input with dynamic sequence axis
    # consisting of a matrix of [steps-in-time X number-of-possible-characters]
    inputSeqAxis = cntk.Axis('inputAxis')
    input = cntk.sequence.input_variable((timeSteps, mapper.numClasses),
                                         sequence_axis=inputSeqAxis,
                                         name='input')

    model = createNetwork(input, layers, mapper.numClasses)

    label = cntk.sequence.input_variable(mapper.numClasses,
                                         sequence_axis=inputSeqAxis,
                                         name='label')

    z = model(input)
    loss = cntk.cross_entropy_with_softmax(z, label)
    error = cntk.classification_error(z, label)

    printer = cntk.logging.ProgressPrinter(tag='Training',
                                           freq=100,
                                           num_epochs=maxEpochs)

    lr_per_sample = cntk.learning_parameter_schedule_per_sample(0.001)
    momentum_schedule = cntk.momentum_schedule_per_sample(0.9990913221888589)
    learner = cntk.momentum_sgd(z.parameters,
                                lr_per_sample,
                                momentum_schedule,
                                gradient_clipping_threshold_per_sample=5.0,
                                gradient_clipping_with_truncation=True)

    #learner = cntk.momentum_sgd(z.parameters, lr, 0.9, minibatch_size=batchSize)
    #learner = cntk.fsadagrad(model.parameters, lr=lr, minibatch_size=batchSize, momentum=0.9, unit_gain=True)
    trainer = cntk.Trainer(z, (loss, error), learner, [printer])

    numMinibatch = mapper.samples // batchSize

    print("Input sequence length: {}; unique characters {};".format(
        timeSteps, mapper.numClasses))
    cntk.logging.log_number_of_parameters(z)
    print("Datset size {}; {} Epochs; {} minibatches per epoch".format(
        mapper.samples, maxEpochs, numMinibatch))

    for epoch in range(maxEpochs):
        mask = [True]
        for mb in range(numMinibatch):
            X, Y = next(gens['train'])
            #X, Y = get_data(mb, batchSize, data, mapper)
            arguments = ({input: X, label: Y}, mask)
            mask = [False]
            trainer.train_minibatch(arguments)

            if mb % 100 == 0:
                print(generateText(z, mapper, 200) + '\n')

        trainer.summarize_training_progress()
        print(generateText(z, mapper, 100))
Example #17
0
    bn_update = []

    for f in flows:
        q, log_det_J = f.forward(q, log_det_J)

    base_dist = MultivariateNormalDiag(loc=[0.]*c_dim, scale_diag=[1.]*c_dim)

    prior_logprob = base_dist.log_prob(q) # or C.log(base_dist.pdf(q))
    loss = -C.reduce_mean(prior_logprob + log_det_J)

    v = np.r_[np.random.randn(512 // 2, 2) + np.array([5, 3]),
                    np.random.randn(512 // 2, 2) + np.array([-5, 3])]
    v = (v - v.mean(axis=0)) / v.std(axis=0)

    lr_rate = 5e-3
    learner = C.adam(loss.parameters, C.learning_parameter_schedule_per_sample(lr_rate), C.momentum_schedule_per_sample(0.99))

    # lr_rate = 1e-2
    # learner = C.adam(loss.parameters, C.learning_parameter_schedule(lr_rate), C.momentum_schedule(0.99))

    trainer = C.Trainer(loss, (loss, None), [learner])

    for i in tqdm(range(500)):
        # v = np.random.uniform(size=(1000,c_dim))
        # v = datasets.make_moons(n_samples=1000, noise=.05)[0].astype(np.float32)
        # v = np.r_[np.random.randn(512 // 2, 2) + np.array([5, 3]),
        #              np.random.randn(512 // 2, 2) + np.array([-5, 3])]
        out = trainer.train_minibatch({loss.arguments[0]:v}, outputs=[prior_logprob, log_det_J])

        if i%100 == 0:
            logprob = out[1][prior_logprob].mean() + out[1][log_det_J].mean()