Example #1
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(
        StreamDefs(amazing_features=StreamDef(
            shape=feature_dim, context=(context, context), scp=features_file)))

    ld = HTKMLFDeserializer(
        label_mapping_file,
        StreamDefs(
            awesome_labels=StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd, ld])

    features = C.input_variable(((2 * context + 1) * feature_dim))
    labels = C.input_variable((num_classes))

    model = Sequential(
        [For(range(3), lambda: Recurrence(LSTM(256))),
         Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error(z, labels)

    learner = C.adam_sgd(z.parameters,
                         lr=C.learning_rate_schedule(lr, C.UnitType.sample,
                                                     epoch_size),
                         momentum=C.momentum_as_time_constant_schedule(1000),
                         low_memory=True,
                         gradient_clipping_threshold_per_sample=15,
                         gradient_clipping_with_truncation=True)
    trainer = C.Trainer(z, (ce, errs), learner)

    input_map = {
        features: reader.streams.amazing_features,
        labels: reader.streams.awesome_labels
    }

    pp = C.ProgressPrinter(freq=0)
    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
        pp.update_with_trainer(trainer, with_metric=True)
    assert True
    os.chdir(abs_path)
Example #2
0
def train(reader, model, max_epochs):
    # Input variables denoting the features and label data
    query = cntk.blocks.Input(input_dim, is_sparse=False)
    slot_labels = cntk.blocks.Input(
        num_labels, is_sparse=True)  # TODO: make sparse once it works

    # apply model to input
    z = model(query)

    # loss and metric
    ce = cntk.ops.cross_entropy_with_softmax(z, slot_labels)
    pe = cntk.ops.classification_error(z, slot_labels)

    # training config
    epoch_size = 36000
    minibatch_size = 70
    num_mbs_to_show_result = 100

    # TODO: Change to round number. This is 664.39. 700?
    momentum_time_constant = cntk.learner.momentum_as_time_constant_schedule(
        minibatch_size / -math.log(0.9))

    # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values)
    lr_schedule = [0.003] * 2 + [0.0015] * 12 + [0.0003]

    # trainer object
    lr_per_sample = cntk.learner.learning_rate_schedule(
        lr_schedule, cntk.learner.UnitType.sample, epoch_size)
    learner = cntk.learner.adam_sgd(z.parameters,
                                    lr=lr_per_sample,
                                    momentum=momentum_time_constant,
                                    low_memory=True,
                                    gradient_clipping_threshold_per_sample=15,
                                    gradient_clipping_with_truncation=True)

    trainer = cntk.Trainer(z, ce, pe, [learner])

    # define mapping from reader streams to network inputs
    input_map = {
        query: reader.streams.query,
        slot_labels: reader.streams.slot_labels
    }

    # process minibatches and perform model training
    cntk.utils.log_number_of_parameters(z)
    print()
    progress_printer = cntk.ProgressPrinter(
        freq=100, first=10, tag='Training')  # more detailed logging
    #progress_printer = ProgressPrinter(tag='Training')

    t = 0

    # loop over epochs
    for epoch in range(max_epochs):
        epoch_end = (epoch + 1) * epoch_size

        # loop over minibatches on the epoch
        while t < epoch_end:
            # BUGBUG? The change of minibatch_size parameter vv has no effect.
            data = reader.next_minibatch(
                min(minibatch_size,
                    epoch_end - t), input_map=input_map)  # fetch minibatch
            trainer.train_minibatch(data)  # update model with it
            t += trainer.previous_minibatch_sample_count  # count samples processed so far
            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress

            #def trace_node(name):
            #    nl = [n for n in z.parameters if n.name() == name]
            #    if len(nl) > 0:
            #        print (name, np.asarray(nl[0].value))
            #trace_node('W')
            #trace_node('stabilizer_param')

        loss, metric, actual_samples = progress_printer.epoch_summary(
            with_metric=True)

    return loss, metric
def train(reader, model, max_epochs, model_dir=None, tensorboard_logdir=None):
    # Input variables denoting the features and label data
    query = cntk.blocks.Input(input_dim, is_sparse=False)
    slot_labels = cntk.blocks.Input(
        num_labels, is_sparse=True)  # TODO: make sparse once it works

    # apply model to input
    z = model(query)

    # loss and metric
    ce = cntk.ops.cross_entropy_with_softmax(z, slot_labels)
    pe = cntk.ops.classification_error(z, slot_labels)

    # training config
    epoch_size = 36000
    minibatch_size = 70
    num_mbs_to_show_result = 100

    # TODO: Change to round number. This is 664.39. 700?
    momentum_time_constant = cntk.learner.momentum_as_time_constant_schedule(
        minibatch_size / -math.log(0.9))

    # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values)
    lr_schedule = [0.003] * 2 + [0.0015] * 12 + [0.0003]
    lr_per_sample = cntk.learner.learning_rate_schedule(
        lr_schedule, cntk.learner.UnitType.sample, epoch_size)
    learner = cntk.learner.adam_sgd(z.parameters,
                                    lr=lr_per_sample,
                                    momentum=momentum_time_constant,
                                    low_memory=True,
                                    gradient_clipping_threshold_per_sample=15,
                                    gradient_clipping_with_truncation=True)

    # Progress writers
    progress_writers = [
        cntk.ProgressPrinter(freq=100,
                             first=10,
                             tag='Training',
                             num_epochs=max_epochs)
    ]  # more detailed logging
    #progress_writers = [cntk.ProgressPrinter(tag='Training', num_epochs=max_epochs)]
    if tensorboard_logdir is not None:
        progress_writers.append(
            cntk.TensorBoardProgressWriter(freq=10,
                                           log_dir=tensorboard_logdir,
                                           model=z))

    # trainer object
    trainer = cntk.Trainer(z, (ce, pe), [learner], progress_writers)

    # define mapping from reader streams to network inputs
    input_map = {
        query: reader.streams.query,
        slot_labels: reader.streams.slot_labels
    }

    # process minibatches and perform model training
    cntk.utils.log_number_of_parameters(z)
    print()

    t = 0
    aggregate_loss = 0
    aggregate_error = 0
    total_samples = 0

    # loop over epochs
    for epoch in range(max_epochs):
        epoch_end = (epoch + 1) * epoch_size

        aggregate_loss = 0
        aggregate_error = 0
        total_samples = 0

        # loop over minibatches on the epoch
        while t < epoch_end:
            # BUGBUG? The change of minibatch_size parameter vv has no effect.
            data = reader.next_minibatch(
                min(minibatch_size,
                    epoch_end - t), input_map=input_map)  # fetch minibatch
            trainer.train_minibatch(data)  # update model with it
            samples = trainer.previous_minibatch_sample_count
            t += samples
            total_samples += samples
            aggregate_loss += trainer.previous_minibatch_loss_average * samples
            aggregate_error += trainer.previous_minibatch_evaluation_average * samples

            #def trace_node(name):
            #    nl = [n for n in z.parameters if n.name() == name]
            #    if len(nl) > 0:
            #        print (name, np.asarray(nl[0].value))
            #trace_node('W')
            #trace_node('stabilizer_param')
        if model_dir:
            z.save(os.path.join(model_dir, "atis" + "_{}.dnn".format(epoch)))
        trainer.summarize_training_progress()

    return aggregate_loss / total_samples, aggregate_error / total_samples