Example #1
0
def train(reader, model, max_epochs):
    # Input variables denoting the features and label data
    query       = Input(input_dim,  is_sparse=False)  # TODO: make sparse once it works
    slot_labels = Input(num_labels, is_sparse=True)

    # apply model to input
    z = model(query)

    # loss and metric
    ce = cross_entropy_with_softmax(z, slot_labels)
    pe = classification_error      (z, slot_labels)

    # training config
    epoch_size = 36000
    minibatch_size = 70
    num_mbs_to_show_result = 100
    time_constant = minibatch_size / math.log(1/0.9)

    lr_per_sample = [0.003]*2+[0.0015]*12+[0.0003]

    # trainer object
    lr_schedule = learning_rate_schedule(lr_per_sample, units=epoch_size)
    learner = fsadagrad(z.parameters, lr_schedule, time_constant,
                        targetAdagradAvDenom=1, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)

    trainer = Trainer(z, ce, pe, [learner])
    #_extend_Trainer(trainer)  # TODO: should be just baked in

    # define mapping from reader streams to network inputs
    input_map = {
        query       : reader.streams.query,
        slot_labels : reader.streams.slot_labels
    }

    # process minibatches and perform model training
    t = 0
    mbs = 0
    for epoch in range(max_epochs):
        loss_numer = 0  # TODO: find a nicer way of tracking, this is clumsy
        loss_denom = 0
        metric_numer = 0
        metric_denom = 0
        epoch_end = (epoch+1) * epoch_size
        while t < epoch_end:
            # BUGBUG: RuntimeError: GetNextMinibatch: Changing minibatch sizes across calls is currently unsupported
            #data, num_samples = next_minibatch(reader, min(minibatch_size, epoch_size-t), input_map)
            data = reader.next_minibatch(minibatch_size, input_map=input_map)
            if data is None:
                break
            trainer.train_minibatch(data)
            loss_numer += trainer.previous_minibatch_loss_average * trainer.previous_minibatch_sample_count  # too much code for something this simple
            loss_denom +=                                           trainer.previous_minibatch_sample_count
            metric_numer += trainer.previous_minibatch_evaluation_average * trainer.previous_minibatch_sample_count
            metric_denom +=                                                 trainer.previous_minibatch_sample_count
            print_training_progress(trainer, mbs if mbs > 10 else 0, num_mbs_to_show_result)
            t += data[slot_labels].num_samples
            mbs += 1
        print("--- EPOCH {} DONE: loss = {:0.6f} * {}, metric = {:0.1f}% * {} ---".format(epoch+1, loss_numer/loss_denom, loss_denom, metric_numer/metric_denom*100.0, metric_denom))

    return loss_numer/loss_denom, metric_numer/metric_denom
Example #2
0
def train_sequence_classifier():
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes,
                           dynamic_axes=[Axis.default_batch_axis()])

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifer_net(features,
                                                    num_output_classes,
                                                    embedding_dim, hidden_dim,
                                                    cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
    feature_stream_name = 'features'
    labels_stream_name = 'labels'

    mb_source = text_format_minibatch_source(path, [
        StreamConfiguration(feature_stream_name, input_dim, True, 'x'),
        StreamConfiguration(labels_stream_name, num_output_classes, False, 'y')
    ], 0)

    features_si = mb_source.stream_info(features)
    labels_si = mb_source.stream_info(label)

    # Instantiate the trainer object to drive the model training
    lr = lr = learning_rates_per_sample(0.0005)
    trainer = Trainer(classifier_output, ce, pe,
                      [sgd_learner(classifier_output.owner.parameters(), lr)])

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200
    training_progress_output_freq = 10
    i = 0
    while True:
        mb = mb_source.get_next_minibatch(minibatch_size)
        if len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {
            features: mb[features_si].m_data,
            label: mb[labels_si].m_data
        }
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)

        i += 1
Example #3
0
def simple_mnist():
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 1
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    input = input_variable(input_dim, np.float32)
    label = input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant((), 0.00390625), input)
    netout = fully_connected_classifier_net(scaled_input, num_output_classes,
                                            hidden_layers_dim,
                                            num_hidden_layers, sigmoid)

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    rel_path = r"../../../../Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
    feature_stream_name = 'features'
    labels_stream_name = 'labels'

    mb_source = text_format_minibatch_source(path, [
        StreamConfiguration(feature_stream_name, input_dim),
        StreamConfiguration(labels_stream_name, num_output_classes)
    ])
    features_si = mb_source.stream_info(feature_stream_name)
    labels_si = mb_source.stream_info(labels_stream_name)

    # Instantiate the trainer object to drive the model training
    lr = learning_rates_per_sample(0.003125)
    trainer = Trainer(netout, ce, pe,
                      [sgd_learner(netout.owner.parameters(), lr)])

    # Get minibatches of images to train with and perform model training
    minibatch_size = 32
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 1
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) / minibatch_size
    training_progress_output_freq = 20
    for i in range(0, int(num_minibatches_to_train)):
        mb = mb_source.get_next_minibatch(minibatch_size)

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {
            input: mb[features_si].m_data,
            label: mb[labels_si].m_data
        }
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)
Example #4
0
def train_sequence_classifier(debug_output=False):
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes,
                           dynamic_axes=[Axis.default_batch_axis()])

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifer_net(features,
                                                    num_output_classes,
                                                    embedding_dim, hidden_dim,
                                                    cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        features: reader.streams.features,
        label: reader.streams.labels
    }

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(classifier_output, ce, pe,
                      sgd(classifier_output.parameters, lr=0.0005))

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200
    training_progress_output_freq = 10

    if debug_output:
        training_progress_output_freq = training_progress_output_freq / 3

    for i in range(251):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)
        print_training_progress(trainer, i, training_progress_output_freq)

    import copy

    evaluation_average = copy.copy(
        trainer.previous_minibatch_evaluation_average)
    loss_average = copy.copy(trainer.previous_minibatch_loss_average)

    return evaluation_average, loss_average
def train_sequence_classifier(debug_output=False):
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes, dynamic_axes=[
                           Axis.default_batch_axis()])

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifer_net(
        features, num_output_classes, embedding_dim, hidden_dim, cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        features : reader.streams.features,
        label    : reader.streams.labels
    }

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(classifier_output, ce, pe,
                      sgd(classifier_output.parameters, lr=0.0005))

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200
    training_progress_output_freq = 10

    if debug_output:
        training_progress_output_freq = training_progress_output_freq/3

    for i in range(251):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)
        print_training_progress(trainer, i, training_progress_output_freq)

    import copy

    evaluation_average = copy.copy(
        trainer.previous_minibatch_evaluation_average)
    loss_average = copy.copy(trainer.previous_minibatch_loss_average)

    return evaluation_average, loss_average
def train_sequence_classifier():
    input_dim = 2000;
    cell_dim = 25;
    hidden_dim = 25;
    embedding_dim = 50;
    num_output_classes = 5;

    # Input variables denoting the features and label data
    features = input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes, dynamic_axes = [Axis.default_batch_axis()])

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
    feature_stream_name = 'features'
    labels_stream_name = 'labels'

    mb_source = text_format_minibatch_source(path, [
                    StreamConfiguration( feature_stream_name, input_dim, True, 'x' ),
                    StreamConfiguration( labels_stream_name, num_output_classes, False, 'y')], 0)

    features_si = mb_source.stream_info(features)
    labels_si = mb_source.stream_info(label)

    # Instantiate the trainer object to drive the model training
    lr = lr = learning_rates_per_sample(0.0005)
    trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)])

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200
    training_progress_output_freq = 10
    i = 0;
    while True:
        mb = mb_source.get_next_minibatch(minibatch_size)
        if  len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {features : mb[features_si].m_data, label : mb[labels_si].m_data}
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)

        i += 1
Example #7
0
def simple_mnist():
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 1
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    input = input_variable(input_dim, np.float32)
    label = input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant((), 0.00390625), input)
    netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid)

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    rel_path = os.path.join(*"../../../../Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt".split("/"))
    path = os.path.normpath(os.path.join(abs_path, rel_path))
    if not os.path.exists(path):
        readme_file = os.path.normpath(os.path.join(os.path.dirname(path), "..", "README.md"))
        raise RuntimeError("File '%s' does not exist. Please follow the instructions at %s to download and prepare it."%(path, readme_file))
    feature_stream_name = 'features'
    labels_stream_name = 'labels'
    
    mb_source = text_format_minibatch_source(path, [ 
                    StreamConfiguration( feature_stream_name, input_dim ), 
                    StreamConfiguration( labels_stream_name, num_output_classes) ])
    features_si = mb_source.stream_info(feature_stream_name)
    labels_si = mb_source.stream_info(labels_stream_name)

    # Instantiate the trainer object to drive the model training
    lr = learning_rates_per_sample(0.003125)
    trainer = Trainer(netout, ce, pe, [sgd_learner(netout.owner.parameters(), lr)])

    # Get minibatches of images to train with and perform model training
    minibatch_size = 32
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 1
    num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size
    training_progress_output_freq = 20
    for i in range(0, int(num_minibatches_to_train)):
        mb = mb_source.get_next_minibatch(minibatch_size)

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {input : mb[features_si].m_data, label : mb[labels_si].m_data}
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)
Example #8
0
def ffnet(debug_output=False):
    input_dim = 2
    num_output_classes = 2
    num_hidden_layers = 2
    hidden_layers_dim = 50

    # Input variables denoting the features and label data
    input = input_variable((input_dim), np.float32)
    label = input_variable((num_output_classes), np.float32)

    # Instantiate the feedforward classification model
    netout = fully_connected_classifier_net(input, num_output_classes,
                                            hidden_layers_dim,
                                            num_hidden_layers, sigmoid)

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(netout, ce, pe, [sgd(netout.parameters(), lr=0.02)])

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_samples_per_sweep = 10000
    num_sweeps_to_train_with = 2
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) / minibatch_size
    training_progress_output_freq = 60

    if debug_output:
        training_progress_output_freq = training_progress_output_freq / 3

    for i in range(0, int(num_minibatches_to_train)):
        features, labels = generate_random_data(minibatch_size, input_dim,
                                                num_output_classes)
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({input: features, label: labels})
        print_training_progress(trainer, i, training_progress_output_freq)

    test_features, test_labels = generate_random_data(minibatch_size,
                                                      input_dim,
                                                      num_output_classes)
    avg_error = trainer.test_minibatch({
        input: test_features,
        label: test_labels
    })
    return avg_error
Example #9
0
def cifar_resnet():
    image_height = 32
    image_width = 32
    num_channels = 3
    num_classes = 10
    feats_stream_name = 'features'
    labels_stream_name = 'labels'
    minibatch_source = create_mb_source(feats_stream_name, labels_stream_name,
                                        image_height, image_width,
                                        num_channels, num_classes)
    features_si = minibatch_source.stream_info(feats_stream_name)
    labels_si = minibatch_source.stream_info(labels_stream_name)

    # Input variables denoting the features and label data
    image_input = input_variable((num_channels, image_height, image_width),
                                 features_si.m_element_type)
    label_var = input_variable((num_classes), features_si.m_element_type)

    # Instantiate the resnet classification model
    classifier_output = resnet_classifer(image_input, num_classes)

    ce = cross_entropy_with_softmax(classifier_output, label_var)
    pe = classification_error(classifier_output, label_var)

    # Instantiate the trainer object to drive the model training
    lr = learning_rates_per_sample(0.0078125)
    trainer = Trainer(classifier_output, ce, pe,
                      [sgd_learner(classifier_output.owner.parameters(), lr)])

    # Get minibatches of images to train with and perform model training
    mb_size = 32
    training_progress_output_freq = 20
    num_mbs = 1000
    for i in range(0, num_mbs):
        mb = minibatch_source.get_next_minibatch(mb_size)

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {
            image_input: mb[features_si].m_data,
            label_var: mb[labels_si].m_data
        }
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)
Example #10
0
def ffnet(debug_output=False):
    input_dim = 2
    num_output_classes = 2
    num_hidden_layers = 2
    hidden_layers_dim = 50

    # Input variables denoting the features and label data
    input = input_variable((input_dim), np.float32)
    label = input_variable((num_output_classes), np.float32)

    # Instantiate the feedforward classification model
    netout = fully_connected_classifier_net(
        input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid)

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(netout, ce, pe, [sgd(netout.parameters(), lr=0.02)])

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_samples_per_sweep = 10000
    num_sweeps_to_train_with = 2
    num_minibatches_to_train = (
        num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size
    training_progress_output_freq = 60

    if debug_output:
        training_progress_output_freq = training_progress_output_freq/3

    for i in range(0, int(num_minibatches_to_train)):
        features, labels = generate_random_data(
            minibatch_size, input_dim, num_output_classes)
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({input: features, label: labels})
        print_training_progress(trainer, i, training_progress_output_freq)

    test_features, test_labels = generate_random_data(
        minibatch_size, input_dim, num_output_classes)
    avg_error = trainer.test_minibatch(
        {input: test_features, label: test_labels})
    return avg_error
Example #11
0
def cifar_resnet():
    image_height = 32
    image_width = 32
    num_channels = 3
    num_classes = 10
    feats_stream_name = 'features'
    labels_stream_name = 'labels'
    minibatch_source = create_mb_source(feats_stream_name, labels_stream_name, 
                        image_height, image_width, num_channels, num_classes)
    features_si = minibatch_source.stream_info(feats_stream_name)
    labels_si = minibatch_source.stream_info(labels_stream_name)

    # Input variables denoting the features and label data
    image_input = input_variable((num_channels, image_height, image_width), features_si.m_element_type)
    label_var = input_variable((num_classes), features_si.m_element_type)

    # Instantiate the resnet classification model
    classifier_output = resnet_classifer(image_input, num_classes)

    ce = cross_entropy_with_softmax(classifier_output, label_var)
    pe = classification_error(classifier_output, label_var)

    # Instantiate the trainer object to drive the model training
    lr = learning_rates_per_sample(0.0078125)
    trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)])

    # Get minibatches of images to train with and perform model training
    mb_size = 32
    training_progress_output_freq = 20
    num_mbs = 1000
    for i in range(0, num_mbs):
        mb=minibatch_source.get_next_minibatch(mb_size)

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {image_input : mb[features_si].m_data, label_var : mb[labels_si].m_data}
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)
Example #12
0
def sequence_to_sequence_translator(debug_output=False, run_test=False):

    input_vocab_dim = 69
    label_vocab_dim = 69

    # network complexity; initially low for faster testing
    hidden_dim = 256
    num_layers = 1

    # Source and target inputs to the model
    batch_axis = Axis.default_batch_axis()
    input_seq_axis = Axis('inputAxis')
    label_seq_axis = Axis('labelAxis')

    input_dynamic_axes = [batch_axis, input_seq_axis]
    raw_input = input_variable(shape=(input_vocab_dim),
                               dynamic_axes=input_dynamic_axes,
                               name='raw_input')

    label_dynamic_axes = [batch_axis, label_seq_axis]
    raw_labels = input_variable(shape=(label_vocab_dim),
                                dynamic_axes=label_dynamic_axes,
                                name='raw_labels')

    # Instantiate the sequence to sequence translation model
    input_sequence = raw_input

    # Drop the sentence start token from the label, for decoder training
    label_sequence = slice(raw_labels, label_seq_axis, 1,
                           0)  # <s> A B C </s> --> A B C </s>
    label_sentence_start = sequence.first(raw_labels)  # <s>

    is_first_label = sequence.is_first(label_sequence)  # <s> 0 0 0 ...
    label_sentence_start_scattered = sequence.scatter(label_sentence_start,
                                                      is_first_label)

    # Encoder
    encoder_outputH = stabilize(input_sequence)
    for i in range(0, num_layers):
        (encoder_outputH,
         encoder_outputC) = LSTMP_component_with_self_stabilization(
             encoder_outputH.output, hidden_dim, hidden_dim, future_value,
             future_value)

    thought_vectorH = sequence.first(encoder_outputH)
    thought_vectorC = sequence.first(encoder_outputC)

    thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH,
                                                      label_sequence)
    thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC,
                                                      label_sequence)

    # Decoder
    decoder_history_hook = alias(
        label_sequence, name='decoder_history_hook')  # copy label_sequence

    decoder_input = element_select(is_first_label,
                                   label_sentence_start_scattered,
                                   past_value(decoder_history_hook))

    decoder_outputH = stabilize(decoder_input)
    for i in range(0, num_layers):
        if (i > 0):
            recurrence_hookH = past_value
            recurrence_hookC = past_value
        else:
            isFirst = sequence.is_first(label_sequence)
            recurrence_hookH = lambda operand: element_select(
                isFirst, thought_vector_broadcastH, past_value(operand))
            recurrence_hookC = lambda operand: element_select(
                isFirst, thought_vector_broadcastC, past_value(operand))

        (decoder_outputH,
         encoder_outputC) = LSTMP_component_with_self_stabilization(
             decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH,
             recurrence_hookC)

    decoder_output = decoder_outputH

    # Softmax output layer
    z = linear_layer(stabilize(decoder_output), label_vocab_dim)

    # Criterion nodes
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # network output for decoder history
    net_output = hardmax(z)

    # make a clone of the graph where the ground truth is replaced by the network output
    ng = z.clone(CloneMethod.share,
                 {decoder_history_hook.output: net_output.output})

    # Instantiate the trainer object to drive the model training
    lr = 0.007
    minibatch_size = 72
    momentum_time_constant = 1100
    m_schedule = momentum_schedule(momentum_time_constant)
    clipping_threshold_per_sample = 2.3
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(z.parameters, lr, m_schedule,
                           clipping_threshold_per_sample,
                           gradient_clipping_with_truncation)
    trainer = Trainer(z, ce, errs, learner)

    # setup data
    rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf"
    train_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              rel_path)
    valid_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              "tiny.ctf")

    feature_stream_name = 'features'
    labels_stream_name = 'labels'

    # readers
    randomize_data = True
    if run_test:
        randomize_data = False  # because we want to get an exact error
    train_reader = text_format_minibatch_source(train_path, [
        StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'),
        StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1')
    ],
                                                randomize=randomize_data)
    features_si_tr = train_reader.stream_info(feature_stream_name)
    labels_si_tr = train_reader.stream_info(labels_stream_name)

    valid_reader = text_format_minibatch_source(valid_path, [
        StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'),
        StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1')
    ],
                                                randomize=False)
    features_si_va = valid_reader.stream_info(feature_stream_name)
    labels_si_va = valid_reader.stream_info(labels_stream_name)

    # get the vocab for printing output sequences in plaintext
    rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.mapping"
    vocab_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              rel_path)
    vocab = [w.strip() for w in open(vocab_path).readlines()]
    i2w = {i: ch for i, ch in enumerate(vocab)}

    # Get minibatches of sequences to train with and perform model training
    i = 0
    mbs = 0
    epoch_size = 908241
    max_epochs = 10
    training_progress_output_freq = 500

    # make things more basic for running a quicker test
    if run_test:
        epoch_size = 5000
        max_epochs = 1
        training_progress_output_freq = 30

    for epoch in range(max_epochs):
        loss_numer = 0
        metric_numer = 0
        denom = 0

        while i < (epoch + 1) * epoch_size:

            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size)

            train_args = {
                'raw_input': mb_train[features_si_tr],
                'raw_labels': mb_train[labels_si_tr]
            }
            trainer.train_minibatch(train_args)

            # collect epoch-wide stats
            samples = trainer.previous_minibatch_sample_count
            loss_numer += trainer.previous_minibatch_loss_average * samples
            metric_numer += trainer.previous_minibatch_evaluation_average * samples
            denom += samples

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % training_progress_output_freq == 0:
                mb_valid = valid_reader.next_minibatch(minibatch_size)
                valid_args = {
                    'raw_input': mb_valid[features_si_va],
                    'raw_labels': mb_valid[labels_si_va]
                }

                e = ng.eval(valid_args)
                print_sequences(e, i2w)

            print_training_progress(trainer, mbs,
                                    training_progress_output_freq)
            i += mb_train[labels_si_tr].num_samples
            mbs += 1

        print("--- EPOCH %d DONE: loss = %f, errs = %f ---" %
              (epoch, loss_numer / denom, 100.0 * (metric_numer / denom)))

    # now setup a test run
    rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.test.ctf"
    test_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             rel_path)

    test_reader = text_format_minibatch_source(test_path, [
        StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'),
        StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1')
    ],
                                               10000,
                                               randomize=False)
    features_si_te = test_reader.stream_info(feature_stream_name)
    labels_si_te = test_reader.stream_info(labels_stream_name)

    test_minibatch_size = 1024

    # Get minibatches of sequences to test and perform testing
    i = 0
    total_error = 0.0
    while True:
        mb = test_reader.next_minibatch(test_minibatch_size)
        if len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be tested with
        arguments = {
            raw_input: mb[features_si_te],
            raw_labels: mb[labels_si_te]
        }
        mb_error = trainer.test_minibatch(arguments)

        total_error += mb_error

        if debug_output:
            print("Minibatch {}, Error {} ".format(i, mb_error))

        i += 1

    # Average of evaluation errors of all test minibatches
    return total_error / i
def cifar_resnet_distributed(data_path, run_test, num_epochs, communicator=None, save_model_filename=None, load_model_filename=None, debug_output=False):
    image_height = 32
    image_width = 32
    num_channels = 3
    num_classes = 10

    feats_stream_name = 'features'
    labels_stream_name = 'labels'

    minibatch_source = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True,
                                     distributed_communicator = communicator)

    features_si = minibatch_source[feats_stream_name]
    labels_si = minibatch_source[labels_stream_name]

    # Instantiate the resnet classification model, or load from file
    
    if load_model_filename:
        print("Loading model:", load_model_filename)
        classifier_output = persist.load_model(load_model_filename)
        image_input = classifier_output.arguments[0]
    else:
        image_input = input_variable(
            (num_channels, image_height, image_width), features_si.m_element_type)
        classifier_output = create_resnet_model(image_input, num_classes)

    # Input variables denoting the features and label data
    label_var = input_variable((num_classes), features_si.m_element_type)

    ce = cross_entropy_with_softmax(classifier_output, label_var)
    pe = classification_error(classifier_output, label_var)

    # Instantiate the trainer object to drive the model training

    mb_size = 128
    num_mb_per_epoch = 100
    
    num_mbs = num_mb_per_epoch * num_epochs

    lr_per_sample = [1/mb_size]*80+[0.1/mb_size]*40+[0.01/mb_size]
    lr_schedule = learning_rate_schedule(lr_per_sample, units = mb_size * num_mb_per_epoch)
    momentum_time_constant = -mb_size/np.log(0.9)

    # create data parallel distributed trainer if needed
    dist_trainer = distributed.data_parallel_distributed_trainer(communicator, False) if communicator else None

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(classifier_output, ce, pe,
                      [momentum_sgd(classifier_output.parameters, lr_schedule, momentum_time_constant, l2_regularization_weight=0.0001)],
                      distributed_trainer = dist_trainer)
    
    # Get minibatches of images to train with and perform model training
    training_progress_output_freq = 100 if communicator else 20

    if debug_output:
        training_progress_output_freq = training_progress_output_freq/4
        
    for i in range(0, num_mbs):
    
        # NOTE: depends on network, the mb_size can be changed dynamically here
        mb = minibatch_source.next_minibatch(mb_size)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        arguments = {
                image_input: mb[features_si], 
                label_var: mb[labels_si]
                }
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)
        
    if save_model_filename:
        print("Saving model:", save_model_filename)
        persist.save_model(classifier_output, save_model_filename)

    if run_test:
        test_minibatch_source = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False)
        features_si = test_minibatch_source[feats_stream_name]
        labels_si = test_minibatch_source[labels_stream_name]

        mb_size = 128
        num_mbs = 100

        total_error = 0.0
        for i in range(0, num_mbs):
            mb = test_minibatch_source.next_minibatch(mb_size)

            # Specify the mapping of input variables in the model to actual
            # minibatch data to be trained with
            arguments = {
                    image_input: mb[features_si], 
                    label_var: mb[labels_si]
                    }
            error = trainer.test_minibatch(arguments)
            total_error += error

        return total_error / num_mbs
    else:
        return 0
Example #14
0
def simple_mnist(debug_output=False):
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 1
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    input = input_variable(input_dim, np.float32)
    label = input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant((), 0.00390625), input)
    netout = fully_connected_classifier_net(scaled_input, num_output_classes,
                                            hidden_layers_dim,
                                            num_hidden_layers, sigmoid)

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    try:
        rel_path = os.path.join(
            os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
            *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/"))
    except KeyError:
        rel_path = os.path.join(
            *"../../../../Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt".
            split("/"))
    path = os.path.normpath(os.path.join(abs_path, rel_path))
    check_path(path)

    feature_stream_name = 'features'
    labels_stream_name = 'labels'

    mb_source = text_format_minibatch_source(path, [
        StreamConfiguration(feature_stream_name, input_dim),
        StreamConfiguration(labels_stream_name, num_output_classes)
    ])
    features_si = mb_source.stream_info(feature_stream_name)
    labels_si = mb_source.stream_info(labels_stream_name)

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(netout, ce, pe, [sgd(netout.parameters(), lr=0.003125)])

    # Get minibatches of images to train with and perform model training
    minibatch_size = 32
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 1
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) / minibatch_size
    training_progress_output_freq = 20
    for i in range(0, int(num_minibatches_to_train)):
        mb = mb_source.get_next_minibatch(minibatch_size)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        arguments = {
            input: mb[features_si].m_data,
            label: mb[labels_si].m_data
        }
        trainer.train_minibatch(arguments)

        if debug_output:
            print_training_progress(trainer, i, training_progress_output_freq)

    # Load test data
    try:
        rel_path = os.path.join(
            os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
            *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/"))
    except KeyError:
        rel_path = os.path.join(
            *"../../../../Examples/Image/MNIST/Data/Test-28x28_cntk_text.txt".
            split("/"))
    path = os.path.normpath(os.path.join(abs_path, rel_path))
    check_path(path)

    test_mb_source = text_format_minibatch_source(path, [
        StreamConfiguration(feature_stream_name, input_dim),
        StreamConfiguration(labels_stream_name, num_output_classes)
    ])
    features_si = test_mb_source.stream_info(feature_stream_name)
    labels_si = test_mb_source.stream_info(labels_stream_name)

    # Test data for trained model
    test_minibatch_size = 512
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0.0
    for i in range(0, int(num_minibatches_to_test)):
        mb = test_mb_source.get_next_minibatch(test_minibatch_size)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be tested with
        arguments = {
            input: mb[features_si].m_data,
            label: mb[labels_si].m_data
        }
        eval_error = trainer.test_minibatch(arguments)
        test_result = test_result + eval_error

    # Average of evaluation errors of all test minibatches
    return test_result / num_minibatches_to_test
Example #15
0
def sequence_to_sequence_translator(debug_output=False):

    input_vocab_dim = 69
    label_vocab_dim = 69

    hidden_dim = 512
    num_layers = 2

    # Source and target inputs to the model
    batch_axis = Axis.default_batch_axis()
    input_seq_axis = Axis("inputAxis")
    label_seq_axis = Axis("labelAxis")

    input_dynamic_axes = [batch_axis, input_seq_axis]
    raw_input = input_variable(shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes)

    label_dynamic_axes = [batch_axis, label_seq_axis]
    raw_labels = input_variable(shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes)

    # Instantiate the sequence to sequence translation model
    input_sequence = raw_input

    # Drop the sentence start token from the label, for decoder training
    label_sequence = slice(raw_labels, label_seq_axis, 1, 0)
    label_sentence_start = sequence.first(raw_labels)

    is_first_label = sequence.is_first(label_sequence)
    label_sentence_start_scattered = sequence.scatter(label_sentence_start, is_first_label)

    # Encoder
    encoder_outputH = stabilize(input_sequence)
    for i in range(0, num_layers):
        (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(
            encoder_outputH.output(), hidden_dim, hidden_dim, future_value, future_value
        )

    thought_vectorH = sequence.first(encoder_outputH)
    thought_vectorC = sequence.first(encoder_outputC)

    thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH, label_sequence)
    thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC, label_sequence)

    # Decoder
    decoder_history_from_ground_truth = label_sequence
    decoder_input = element_select(
        is_first_label, label_sentence_start_scattered, past_value(decoder_history_from_ground_truth)
    )

    decoder_outputH = stabilize(decoder_input)
    for i in range(0, num_layers):
        if i > 0:
            recurrence_hookH = past_value
            recurrence_hookC = past_value
        else:
            isFirst = sequence.is_first(label_sequence)
            recurrence_hookH = lambda operand: element_select(isFirst, thought_vector_broadcastH, past_value(operand))
            recurrence_hookC = lambda operand: element_select(isFirst, thought_vector_broadcastC, past_value(operand))

        (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(
            decoder_outputH.output(), hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC
        )

    decoder_output = decoder_outputH
    decoder_dim = hidden_dim

    # Softmax output layer
    z = linear_layer(stabilize(decoder_output), label_vocab_dim)
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # Instantiate the trainer object to drive the model training
    lr = 0.007
    momentum_time_constant = 1100
    momentum_per_sample = momentums_per_sample(math.exp(-1.0 / momentum_time_constant))
    clipping_threshold_per_sample = 2.3
    gradient_clipping_with_truncation = True

    trainer = Trainer(
        z,
        ce,
        errs,
        [
            momentum_sgd(
                z.parameters(),
                lr,
                momentum_per_sample,
                clipping_threshold_per_sample,
                gradient_clipping_with_truncation,
            )
        ],
    )

    rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
    feature_stream_name = "features"
    labels_stream_name = "labels"

    mb_source = text_format_minibatch_source(
        path,
        [
            StreamConfiguration(feature_stream_name, input_vocab_dim, True, "S0"),
            StreamConfiguration(labels_stream_name, label_vocab_dim, True, "S1"),
        ],
        10000,
    )
    features_si = mb_source[feature_stream_name]
    labels_si = mb_source[labels_stream_name]

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 72
    training_progress_output_freq = 30
    if debug_output:
        training_progress_output_freq = training_progress_output_freq / 3

    while True:
        mb = mb_source.get_next_minibatch(minibatch_size)
        if len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        arguments = {raw_input: mb[features_si], raw_labels: mb[labels_si]}
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)
        i += 1

    rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.test.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    test_mb_source = text_format_minibatch_source(
        path,
        [
            StreamConfiguration(feature_stream_name, input_vocab_dim, True, "S0"),
            StreamConfiguration(labels_stream_name, label_vocab_dim, True, "S1"),
        ],
        10000,
        False,
    )
    features_si = test_mb_source[feature_stream_name]
    labels_si = test_mb_source[labels_stream_name]

    # choose this to be big enough for the longest sentence
    train_minibatch_size = 1024

    # Get minibatches of sequences to test and perform testing
    i = 0
    total_error = 0.0
    while True:
        mb = test_mb_source.get_next_minibatch(train_minibatch_size)
        if len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be tested with
        arguments = {raw_input: mb[features_si], raw_labels: mb[labels_si]}
        mb_error = trainer.test_minibatch(arguments)

        total_error += mb_error

        if debug_output:
            print("Minibatch {}, Error {} ".format(i, mb_error))

        i += 1

    # Average of evaluation errors of all test minibatches
    return total_error / i
Example #16
0
def train_sequence_classifier(debug_output=False):
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes, dynamic_axes=[
                           Axis.default_batch_axis()])

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifer_net(
        features, num_output_classes, embedding_dim, hidden_dim, cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
    feature_stream_name = 'features'
    labels_stream_name = 'labels'

    mb_source = text_format_minibatch_source(path, [
        StreamConfiguration(feature_stream_name, input_dim, True, 'x'),
        StreamConfiguration(labels_stream_name, num_output_classes, False, 'y')], 0)

    features_si = mb_source[features]
    labels_si = mb_source[label]

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(classifier_output, ce, pe,
                      [sgd(classifier_output.parameters(), lr=0.0005)])

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200
    training_progress_output_freq = 10
    i = 0

    if debug_output:
        training_progress_output_freq = training_progress_output_freq/3

    while True:
        mb = mb_source.get_next_minibatch(minibatch_size)

        if len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        arguments = {features: mb[features_si],
                     label: mb[labels_si]}
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)
        i += 1

    import copy

    evaluation_average = copy.copy(
        trainer.previous_minibatch_evaluation_average())
    loss_average = copy.copy(trainer.previous_minibatch_loss_average())

    return evaluation_average, loss_average
Example #17
0
def cifar_resnet(base_path, debug_output=False):
    image_height = 32
    image_width = 32
    num_channels = 3
    num_classes = 10
    feats_stream_name = 'features'
    labels_stream_name = 'labels'

    minibatch_source = create_mb_source(feats_stream_name, labels_stream_name, 
                        image_height, image_width, num_channels, num_classes, base_path)
    features_si = minibatch_source[feats_stream_name]
    labels_si = minibatch_source[labels_stream_name]

    # Input variables denoting the features and label data
    image_input = input_variable(
        (num_channels, image_height, image_width), features_si.m_element_type)
    label_var = input_variable((num_classes), features_si.m_element_type)

    # Instantiate the resnet classification model
    classifier_output = resnet_classifer(image_input, num_classes)

    ce = cross_entropy_with_softmax(classifier_output, label_var)
    pe = classification_error(classifier_output, label_var)

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(classifier_output, ce, pe,
                      [sgd(classifier_output.parameters(), lr=0.0078125)])

    # Get minibatches of images to train with and perform model training
    mb_size = 32
    training_progress_output_freq = 60
    num_mbs = 1000

    if debug_output:
        training_progress_output_freq = training_progress_output_freq/3

    for i in range(0, num_mbs):
        mb = minibatch_source.get_next_minibatch(mb_size)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        arguments = {
                image_input: mb[features_si], 
                label_var: mb[labels_si]
                }
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)

    test_minibatch_source = create_test_mb_source(feats_stream_name, labels_stream_name,
                    image_height, image_width, num_channels, num_classes, base_path)
    features_si = test_minibatch_source[feats_stream_name]
    labels_si = test_minibatch_source[labels_stream_name]

    mb_size = 64
    num_mbs = 300

    total_error = 0.0
    for i in range(0, num_mbs):
        mb = test_minibatch_source.get_next_minibatch(mb_size)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        arguments = {
                image_input: mb[features_si], 
                label_var: mb[labels_si]
                }
        error = trainer.test_minibatch(arguments)
        total_error += error

    return total_error / num_mbs
Example #18
0
def simple_mnist(debug_output=False):
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 1
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    input = input_variable(input_dim, np.float32)
    label = input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant(0.00390625), input)
    netout = fully_connected_classifier_net(scaled_input, num_output_classes,
                                            hidden_layers_dim,
                                            num_hidden_layers, relu)

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    try:
        rel_path = os.path.join(
            os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
            *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/"))
    except KeyError:
        rel_path = os.path.join(
            *
            "../../../../Examples/Image/DataSets/MNIST/Train-28x28_cntk_text.txt"
            .split("/"))
    path = os.path.normpath(os.path.join(abs_path, rel_path))
    check_path(path)

    reader_train = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        input: reader_train.streams.features,
        label: reader_train.streams.labels
    }

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(netout, ce, pe, sgd(netout.parameters, lr=0.003125))

    # Get minibatches of images to train with and perform model training
    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10
    num_minibatches_to_train = (num_samples_per_sweep *
                                num_sweeps_to_train_with) / minibatch_size
    training_progress_output_freq = 500

    if debug_output:
        training_progress_output_freq = training_progress_output_freq / 4

    for i in range(0, int(num_minibatches_to_train)):
        mb = reader_train.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)
        print_training_progress(trainer, i, training_progress_output_freq)

    # Load test data
    try:
        rel_path = os.path.join(
            os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
            *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/"))
    except KeyError:
        rel_path = os.path.join(
            *
            "../../../../Examples/Image/DataSets/MNIST/Test-28x28_cntk_text.txt"
            .split("/"))
    path = os.path.normpath(os.path.join(abs_path, rel_path))
    check_path(path)

    reader_test = create_reader(path, False, input_dim, num_output_classes)

    input_map = {
        input: reader_test.streams.features,
        label: reader_test.streams.labels
    }

    # Test data for trained model
    test_minibatch_size = 1024
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0.0
    for i in range(0, int(num_minibatches_to_test)):
        mb = reader_test.next_minibatch(test_minibatch_size,
                                        input_map=input_map)
        eval_error = trainer.test_minibatch(mb)
        test_result = test_result + eval_error

    # Average of evaluation errors of all test minibatches
    return test_result / num_minibatches_to_test
Example #19
0
def sequence_to_sequence_translator(debug_output=False):

    input_vocab_dim = 69
    label_vocab_dim = 69

    hidden_dim = 512
    num_layers = 2

    # Source and target inputs to the model
    batch_axis = Axis.default_batch_axis()
    input_seq_axis = Axis('inputAxis')
    label_seq_axis = Axis('labelAxis')

    input_dynamic_axes = [batch_axis, input_seq_axis]
    raw_input = input_variable(
        shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes)

    label_dynamic_axes = [batch_axis, label_seq_axis]
    raw_labels = input_variable(
        shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes)

    # Instantiate the sequence to sequence translation model
    input_sequence = raw_input

    # Drop the sentence start token from the label, for decoder training
    label_sequence = slice(raw_labels, label_seq_axis, 1, 0)
    label_sentence_start = sequence.first(raw_labels)

    is_first_label = sequence.is_first(label_sequence)
    label_sentence_start_scattered = sequence.scatter(
        label_sentence_start, is_first_label)

    # Encoder
    encoder_outputH = stabilize(input_sequence)
    for i in range(0, num_layers):
        (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(
            encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value)

    thought_vectorH = sequence.first(encoder_outputH)
    thought_vectorC = sequence.first(encoder_outputC)

    thought_vector_broadcastH = sequence.broadcast_as(
        thought_vectorH, label_sequence)
    thought_vector_broadcastC = sequence.broadcast_as(
        thought_vectorC, label_sequence)

    # Decoder
    decoder_history_from_ground_truth = label_sequence
    decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(
        decoder_history_from_ground_truth))

    decoder_outputH = stabilize(decoder_input)
    for i in range(0, num_layers):
        if (i > 0):
            recurrence_hookH = past_value
            recurrence_hookC = past_value
        else:
            isFirst = sequence.is_first(label_sequence)
            recurrence_hookH = lambda operand: element_select(
                isFirst, thought_vector_broadcastH, past_value(operand))
            recurrence_hookC = lambda operand: element_select(
                isFirst, thought_vector_broadcastC, past_value(operand))

        (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(
            decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC)

    decoder_output = decoder_outputH
    decoder_dim = hidden_dim

    # Softmax output layer
    z = linear_layer(stabilize(decoder_output), label_vocab_dim)
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # Instantiate the trainer object to drive the model training
    lr = 0.007
    momentum_time_constant = 1100
    m_schedule = momentum_schedule(momentum_time_constant)
    clipping_threshold_per_sample = 2.3
    gradient_clipping_with_truncation = True

    trainer = Trainer(z, ce, errs, [momentum_sgd(z.parameters, lr, m_schedule, clipping_threshold_per_sample, gradient_clipping_with_truncation)])                   

    rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
    feature_stream_name = 'features'
    labels_stream_name = 'labels'

    mb_source = text_format_minibatch_source(path, [
        StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'),
        StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1')], 10000)
    features_si = mb_source[feature_stream_name]
    labels_si = mb_source[labels_stream_name]

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 72
    training_progress_output_freq = 30
    if debug_output:
        training_progress_output_freq = training_progress_output_freq/3

    while True:
        mb = mb_source.next_minibatch(minibatch_size)
        if len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        arguments = {raw_input: mb[features_si],
                     raw_labels: mb[labels_si]}
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)
        i += 1

    rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.test.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    test_mb_source = text_format_minibatch_source(path, [
        StreamConfiguration(feature_stream_name, input_vocab_dim, True, 'S0'),
        StreamConfiguration(labels_stream_name, label_vocab_dim, True, 'S1')], 10000, False)
    features_si = test_mb_source[feature_stream_name]
    labels_si = test_mb_source[labels_stream_name]

    # choose this to be big enough for the longest sentence
    train_minibatch_size = 1024 

    # Get minibatches of sequences to test and perform testing
    i = 0
    total_error = 0.0
    while True:
        mb = test_mb_source.next_minibatch(train_minibatch_size)
        if len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be tested with
        arguments = {raw_input: mb[features_si],
                     raw_labels: mb[labels_si]}
        mb_error = trainer.test_minibatch(arguments)

        total_error += mb_error

        if debug_output:
            print("Minibatch {}, Error {} ".format(i, mb_error))

        i += 1

    # Average of evaluation errors of all test minibatches
    return total_error / i
Example #20
0
def cifar_resnet(base_path, debug_output=False):
    image_height = 32
    image_width = 32
    num_channels = 3
    num_classes = 10
    feats_stream_name = 'features'
    labels_stream_name = 'labels'

    minibatch_source = create_mb_source(feats_stream_name, labels_stream_name, 
                        image_height, image_width, num_channels, num_classes, base_path)
    features_si = minibatch_source[feats_stream_name]
    labels_si = minibatch_source[labels_stream_name]

    # Input variables denoting the features and label data
    image_input = input_variable(
        (num_channels, image_height, image_width), features_si.m_element_type)
    label_var = input_variable((num_classes), features_si.m_element_type)

    # Instantiate the resnet classification model
    classifier_output = resnet_classifer(image_input, num_classes)

    ce = cross_entropy_with_softmax(classifier_output, label_var)
    pe = classification_error(classifier_output, label_var)

    mb_size = 128
    num_mb_per_epoch = 100
    num_epochs = 10
    num_mbs = num_mb_per_epoch * num_epochs

    lr_per_sample = [1/mb_size]*80+[0.1/mb_size]*40+[0.01/mb_size]
    lr_schedule = learning_rate_schedule(lr_per_sample, units=mb_size * num_mb_per_epoch)
    momentum_per_sample=0.9**(1.0/128)
    
    # Instantiate the trainer object to drive the model training
    trainer = Trainer(classifier_output, ce, pe,
                      [momentum_sgd(classifier_output.parameters, lr_schedule, momentum_per_sample, l2_regularization_weight=0.0001)])

    # Get minibatches of images to train with and perform model training
    training_progress_output_freq = 100

    if debug_output:
        training_progress_output_freq = training_progress_output_freq/3

    for i in range(0, num_mbs):
        mb = minibatch_source.next_minibatch(mb_size)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        arguments = {
                image_input: mb[features_si], 
                label_var: mb[labels_si]
                }
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)

    test_minibatch_source = create_test_mb_source(feats_stream_name, labels_stream_name,
                    image_height, image_width, num_channels, num_classes, base_path)
    features_si = test_minibatch_source[feats_stream_name]
    labels_si = test_minibatch_source[labels_stream_name]

    mb_size = 128
    num_mbs = 100

    total_error = 0.0
    for i in range(0, num_mbs):
        mb = test_minibatch_source.next_minibatch(mb_size)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        arguments = {
                image_input: mb[features_si], 
                label_var: mb[labels_si]
                }
        error = trainer.test_minibatch(arguments)
        total_error += error

    return total_error / num_mbs
Example #21
0
def simple_mnist(debug_output=False):
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 1
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    input = input_variable(input_dim, np.float32)
    label = input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant(0.00390625), input)
    netout = fully_connected_classifier_net(
        scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu)

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    try:
        rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
                                *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/"))
    except KeyError:
        rel_path = os.path.join(*"../../../../Examples/Image/DataSets/MNIST/Train-28x28_cntk_text.txt".split("/"))
    path = os.path.normpath(os.path.join(abs_path, rel_path))
    check_path(path)

    reader_train = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
        input  : reader_train.streams.features,
        label  : reader_train.streams.labels
    }

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(netout, ce, pe, sgd(netout.parameters, lr=0.003125))

    # Get minibatches of images to train with and perform model training
    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10
    num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size
    training_progress_output_freq = 500



    if debug_output:
        training_progress_output_freq = training_progress_output_freq/4

    for i in range(0, int(num_minibatches_to_train)):
        mb = reader_train.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)
        print_training_progress(trainer, i, training_progress_output_freq)

    # Load test data
    try:
        rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
                                *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/"))
    except KeyError:
        rel_path = os.path.join(*"../../../../Examples/Image/DataSets/MNIST/Test-28x28_cntk_text.txt".split("/"))
    path = os.path.normpath(os.path.join(abs_path, rel_path))
    check_path(path)

    reader_test = create_reader(path, False, input_dim, num_output_classes)

    input_map = {
        input  : reader_test.streams.features,
        label  : reader_test.streams.labels
    }

    # Test data for trained model
    test_minibatch_size = 1024
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0.0
    for i in range(0, int(num_minibatches_to_test)):
        mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map)
        eval_error = trainer.test_minibatch(mb)
        test_result = test_result + eval_error

    # Average of evaluation errors of all test minibatches
    return test_result / num_minibatches_to_test
Example #22
0
def simple_mnist(debug_output=False):
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 1
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    input = input_variable(input_dim, np.float32)
    label = input_variable(num_output_classes, np.float32)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant((), 0.00390625), input)
    netout = fully_connected_classifier_net(
        scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid
    )

    ce = cross_entropy_with_softmax(netout, label)
    pe = classification_error(netout, label)

    try:
        rel_path = os.path.join(
            os.environ["CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY"],
            *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/")
        )
    except KeyError:
        rel_path = os.path.join(*"../../../../Examples/Image/Datasets/MNIST/Train-28x28_cntk_text.txt".split("/"))
    path = os.path.normpath(os.path.join(abs_path, rel_path))
    check_path(path)

    feature_stream_name = "features"
    labels_stream_name = "labels"

    mb_source = text_format_minibatch_source(
        path,
        [
            StreamConfiguration(feature_stream_name, input_dim),
            StreamConfiguration(labels_stream_name, num_output_classes),
        ],
    )
    features_si = mb_source[feature_stream_name]
    labels_si = mb_source[labels_stream_name]

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(netout, ce, pe, [sgd(netout.parameters(), lr=0.003125)])

    # Get minibatches of images to train with and perform model training
    minibatch_size = 32
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 1
    num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size
    training_progress_output_freq = 80

    if debug_output:
        training_progress_output_freq = training_progress_output_freq / 4

    for i in range(0, int(num_minibatches_to_train)):
        mb = mb_source.get_next_minibatch(minibatch_size)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        arguments = {input: mb[features_si], label: mb[labels_si]}
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)

    # Load test data
    try:
        rel_path = os.path.join(
            os.environ["CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY"], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/")
        )
    except KeyError:
        rel_path = os.path.join(*"../../../../Examples/Image/Datasets/MNIST/Test-28x28_cntk_text.txt".split("/"))
    path = os.path.normpath(os.path.join(abs_path, rel_path))
    check_path(path)

    test_mb_source = text_format_minibatch_source(
        path,
        [
            StreamConfiguration(feature_stream_name, input_dim),
            StreamConfiguration(labels_stream_name, num_output_classes),
        ],
        randomize=False,
    )
    features_si = test_mb_source[feature_stream_name]
    labels_si = test_mb_source[labels_stream_name]

    # Test data for trained model
    test_minibatch_size = 512
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0.0
    for i in range(0, int(num_minibatches_to_test)):
        mb = test_mb_source.get_next_minibatch(test_minibatch_size)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be tested with
        arguments = {input: mb[features_si], label: mb[labels_si]}
        eval_error = trainer.test_minibatch(arguments)
        test_result = test_result + eval_error

    # Average of evaluation errors of all test minibatches
    return test_result / num_minibatches_to_test
Example #23
0
def train_sequence_to_sequence_translator():

    input_vocab_dim = 69
    label_vocab_dim = 69

    hidden_dim = 512
    num_layers = 2

    # Source and target inputs to the model
    input_dynamic_axes = [ Axis('inputAxis'), Axis.default_batch_axis() ]
    raw_input = input_variable(shape=(input_vocab_dim), dynamic_axes = input_dynamic_axes)

    label_dynamic_axes = [ Axis('labelAxis'), Axis.default_batch_axis() ]
    raw_labels = input_variable(shape=(label_vocab_dim), dynamic_axes = label_dynamic_axes)

    # Instantiate the sequence to sequence translation model
    input_sequence = raw_input

    # Drop the sentence start token from the label, for decoder training
    label_sequence = slice(raw_labels, label_dynamic_axes[0], 1, 0)
    label_sentence_start = sequence.first(raw_labels)

    is_first_label = sequence.is_first(label_sequence)
    label_sentence_start_scattered = sequence.scatter(label_sentence_start, is_first_label)

    # Encoder
    encoder_outputH = stabilize(input_sequence)
    for i in range(0, num_layers):
        (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(encoder_outputH, hidden_dim, hidden_dim, future_value, future_value)

    thought_vectorH = sequence.first(encoder_outputH)
    thought_vectorC = sequence.first(encoder_outputC)

    thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH, label_sequence)
    thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC, label_sequence)
    
    # Decoder
    decoder_history_from_ground_truth = label_sequence
    decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(decoder_history_from_ground_truth))

    decoder_outputH = stabilize(decoder_input)
    for i in range(0, num_layers):
        if (i == 0):
            recurrence_hookH = past_value
            recurrence_hookC = past_value
        else:
            isFirst = sequence.is_first(label_sequence)
            recurrence_hookH = lambda operand: element_select(isFirst, thought_vector_broadcastH, past_value(operand))
            recurrence_hookC = lambda operand: element_select(isFirst, thought_vector_broadcastC, past_value(operand))

        (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(decoder_outputH, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC)

    decoder_output = decoder_outputH
    decoder_dim = hidden_dim

    # Softmax output layer
    z = linear_layer(stabilize(decoder_output), label_vocab_dim)
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
    feature_stream_name = 'features'
    labels_stream_name = 'labels'

    mb_source = text_format_minibatch_source(path, [ 
                    StreamConfiguration( feature_stream_name, input_vocab_dim, True, 'S0' ), 
                    StreamConfiguration( labels_stream_name, label_vocab_dim, True, 'S1') ], 10000)
    features_si = mb_source.stream_info(feature_stream_name)
    labels_si = mb_source.stream_info(labels_stream_name)

    # Instantiate the trainer object to drive the model training
    lr = learning_rates_per_sample(0.007)
    momentum_time_constant = 1100
    momentum_per_sample = momentums_per_sample(math.exp(-1.0 / momentum_time_constant))
    clipping_threshold_per_sample = 2.3
    gradient_clipping_with_truncation = True

    trainer = Trainer(z, ce, errs, [momentum_sgd_learner(z.owner.parameters(), lr, momentum_per_sample, clipping_threshold_per_sample, gradient_clipping_with_truncation)])                   

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 72
    training_progress_output_freq = 10
    while True:
        mb = mb_source.get_next_minibatch(minibatch_size)
        if  len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {raw_input : mb[features_si].m_data, raw_labels : mb[labels_si].m_data}
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)

        i += 1
Example #24
0
def sequence_to_sequence_translator(debug_output=False, run_test=False):

    input_vocab_dim = 69
    label_vocab_dim = 69

    # network complexity; initially low for faster testing
    hidden_dim = 256
    num_layers = 1

    # Source and target inputs to the model
    batch_axis = Axis.default_batch_axis()
    input_seq_axis = Axis('inputAxis')
    label_seq_axis = Axis('labelAxis')

    input_dynamic_axes = [batch_axis, input_seq_axis]
    raw_input = input_variable(shape=(input_vocab_dim),
                               dynamic_axes=input_dynamic_axes,
                               name='raw_input')

    label_dynamic_axes = [batch_axis, label_seq_axis]
    raw_labels = input_variable(shape=(label_vocab_dim),
                                dynamic_axes=label_dynamic_axes,
                                name='raw_labels')

    # Instantiate the sequence to sequence translation model
    input_sequence = raw_input

    # Drop the sentence start token from the label, for decoder training
    label_sequence = slice(raw_labels, label_seq_axis, 1,
                           0)  # <s> A B C </s> --> A B C </s>
    label_sentence_start = sequence.first(raw_labels)  # <s>

    is_first_label = sequence.is_first(label_sequence)  # <s> 0 0 0 ...
    label_sentence_start_scattered = sequence.scatter(label_sentence_start,
                                                      is_first_label)

    # Encoder
    encoder_outputH = stabilize(input_sequence)
    for i in range(0, num_layers):
        (encoder_outputH,
         encoder_outputC) = LSTMP_component_with_self_stabilization(
             encoder_outputH.output, hidden_dim, hidden_dim, future_value,
             future_value)

    thought_vectorH = sequence.first(encoder_outputH)
    thought_vectorC = sequence.first(encoder_outputC)

    thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH,
                                                      label_sequence)
    thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC,
                                                      label_sequence)

    # Decoder
    decoder_history_hook = alias(
        label_sequence, name='decoder_history_hook')  # copy label_sequence

    decoder_input = element_select(is_first_label,
                                   label_sentence_start_scattered,
                                   past_value(decoder_history_hook))

    decoder_outputH = stabilize(decoder_input)
    for i in range(0, num_layers):
        if (i > 0):
            recurrence_hookH = past_value
            recurrence_hookC = past_value
        else:
            isFirst = sequence.is_first(label_sequence)
            recurrence_hookH = lambda operand: element_select(
                isFirst, thought_vector_broadcastH, past_value(operand))
            recurrence_hookC = lambda operand: element_select(
                isFirst, thought_vector_broadcastC, past_value(operand))

        (decoder_outputH,
         encoder_outputC) = LSTMP_component_with_self_stabilization(
             decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH,
             recurrence_hookC)

    decoder_output = decoder_outputH

    # Softmax output layer
    z = linear_layer(stabilize(decoder_output), label_vocab_dim)

    # Criterion nodes
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # network output for decoder history
    net_output = hardmax(z)

    # make a clone of the graph where the ground truth is replaced by the network output
    ng = z.clone(CloneMethod.share,
                 {decoder_history_hook.output: net_output.output})

    # Instantiate the trainer object to drive the model training
    lr_per_sample = 0.007
    minibatch_size = 72
    momentum_time_constant = momentum_as_time_constant_schedule(1100)
    clipping_threshold_per_sample = 2.3
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(
        z.parameters,
        lr_per_sample,
        momentum_time_constant,
        gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
        gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    trainer = Trainer(z, ce, errs, learner)

    # setup data
    rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf"
    train_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              rel_path)
    valid_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              "tiny.ctf")

    # readers
    randomize_data = True
    if run_test:
        randomize_data = False  # because we want to get an exact error

    train_reader = create_reader(train_path, randomize_data, input_vocab_dim,
                                 label_vocab_dim)
    train_bind = {
        raw_input: train_reader.streams.features,
        raw_labels: train_reader.streams.labels
    }

    # get the vocab for printing output sequences in plaintext
    rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.mapping"
    vocab_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              rel_path)
    vocab = [w.strip() for w in open(vocab_path).readlines()]
    i2w = {i: ch for i, ch in enumerate(vocab)}

    # Get minibatches of sequences to train with and perform model training
    i = 0
    mbs = 0
    epoch_size = 908241
    max_epochs = 10
    training_progress_output_freq = 500

    # make things more basic for running a quicker test
    if run_test:
        epoch_size = 5000
        max_epochs = 1
        training_progress_output_freq = 30

    valid_reader = create_reader(valid_path, False, input_vocab_dim,
                                 label_vocab_dim)
    valid_bind = {
        find_arg_by_name('raw_input', ng): valid_reader.streams.features,
        find_arg_by_name('raw_labels', ng): valid_reader.streams.labels
    }

    for epoch in range(max_epochs):
        loss_numer = 0
        metric_numer = 0
        denom = 0

        while i < (epoch + 1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size,
                                                   input_map=train_bind)
            trainer.train_minibatch(mb_train)

            # collect epoch-wide stats
            samples = trainer.previous_minibatch_sample_count
            loss_numer += trainer.previous_minibatch_loss_average * samples
            metric_numer += trainer.previous_minibatch_evaluation_average * samples
            denom += samples

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % training_progress_output_freq == 0:
                mb_valid = valid_reader.next_minibatch(minibatch_size,
                                                       input_map=valid_bind)
                e = ng.eval(mb_valid)
                print_sequences(e, i2w)

            print_training_progress(trainer, mbs,
                                    training_progress_output_freq)
            i += mb_train[raw_labels].num_samples
            mbs += 1

        print("--- EPOCH %d DONE: loss = %f, errs = %f ---" %
              (epoch, loss_numer / denom, 100.0 * (metric_numer / denom)))

    error1 = translator_test_error(z, trainer, input_vocab_dim,
                                   label_vocab_dim)

    save_model(z, "seq2seq.dnn")
    z = load_model("seq2seq.dnn")

    label_seq_axis = Axis('labelAxis')
    label_sequence = slice(find_arg_by_name('raw_labels', z), label_seq_axis,
                           1, 0)
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)
    trainer = Trainer(z, ce, errs, [
        momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                     clipping_threshold_per_sample,
                     gradient_clipping_with_truncation)
    ])

    error2 = translator_test_error(z, trainer, input_vocab_dim,
                                   label_vocab_dim)

    assert error1 == error2

    return error1
def cifar_resnet_distributed(data_path, run_test, num_epochs, communicator=None, save_model_filename=None, load_model_filename=None, debug_output=False):
    image_height = 32
    image_width = 32
    num_channels = 3
    num_classes = 10

    feats_stream_name = 'features'
    labels_stream_name = 'labels'

    minibatch_source = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True,
                                     distributed_communicator = communicator)

    features_si = minibatch_source[feats_stream_name]
    labels_si = minibatch_source[labels_stream_name]

    # Instantiate the resnet classification model, or load from file
    
    if load_model_filename:
        print("Loading model:", load_model_filename)
        classifier_output = persist.load_model(load_model_filename)
        image_input = classifier_output.arguments[0]
    else:
        image_input = input_variable(
            (num_channels, image_height, image_width), features_si.m_element_type)
        classifier_output = create_resnet_model(image_input, num_classes)

    # Input variables denoting the features and label data
    label_var = input_variable((num_classes), features_si.m_element_type)

    ce = cross_entropy_with_softmax(classifier_output, label_var)
    pe = classification_error(classifier_output, label_var)

    # Instantiate the trainer object to drive the model training

    mb_size = 128
    num_mb_per_epoch = 100
    
    num_mbs = num_mb_per_epoch * num_epochs

    lr_per_minibatch = learning_rate_schedule([1]*80 + [0.1]*40 + [0.01], mb_size * num_mb_per_epoch, UnitType.minibatch)
    momentum_time_constant = momentum_as_time_constant_schedule(-mb_size/np.log(0.9))

    # create data parallel distributed trainer if needed
    dist_trainer = distributed.data_parallel_distributed_trainer(communicator, False) if communicator else None

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(classifier_output, ce, pe,
                      [momentum_sgd(classifier_output.parameters, lr=lr_per_minibatch, momentum=momentum_time_constant, l2_regularization_weight=0.0001)],
                      distributed_trainer = dist_trainer)
    
    # Get minibatches of images to train with and perform model training
    training_progress_output_freq = 100 if communicator else 20

    if debug_output:
        training_progress_output_freq = training_progress_output_freq/4
        
    for i in range(0, num_mbs):
    
        # NOTE: depends on network, the mb_size can be changed dynamically here
        mb = minibatch_source.next_minibatch(mb_size)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        arguments = {
                image_input: mb[features_si], 
                label_var: mb[labels_si]
                }
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)
        
    if save_model_filename:
        print("Saving model:", save_model_filename)
        persist.save_model(classifier_output, save_model_filename)

    if run_test:
        test_minibatch_source = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False)
        features_si = test_minibatch_source[feats_stream_name]
        labels_si = test_minibatch_source[labels_stream_name]

        mb_size = 128
        num_mbs = 100

        total_error = 0.0
        for i in range(0, num_mbs):
            mb = test_minibatch_source.next_minibatch(mb_size)

            # Specify the mapping of input variables in the model to actual
            # minibatch data to be trained with
            arguments = {
                    image_input: mb[features_si], 
                    label_var: mb[labels_si]
                    }
            error = trainer.test_minibatch(arguments)
            total_error += error

        return total_error / num_mbs
    else:
        return 0
Example #26
0
def cifar_resnet(base_path, debug_output=False):
    image_height = 32
    image_width = 32
    num_channels = 3
    num_classes = 10
    feats_stream_name = 'features'
    labels_stream_name = 'labels'

    minibatch_source = create_mb_source(feats_stream_name, labels_stream_name,
                                        image_height, image_width,
                                        num_channels, num_classes, base_path)
    features_si = minibatch_source[feats_stream_name]
    labels_si = minibatch_source[labels_stream_name]

    # Input variables denoting the features and label data
    image_input = input_variable((num_channels, image_height, image_width),
                                 features_si.m_element_type)
    label_var = input_variable((num_classes), features_si.m_element_type)

    # Instantiate the resnet classification model
    classifier_output = resnet_classifer(image_input, num_classes)

    ce = cross_entropy_with_softmax(classifier_output, label_var)
    pe = classification_error(classifier_output, label_var)

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(classifier_output, ce, pe,
                      [sgd(classifier_output.parameters(), lr=0.0078125)])

    # Get minibatches of images to train with and perform model training
    mb_size = 32
    training_progress_output_freq = 60
    num_mbs = 1000

    if debug_output:
        training_progress_output_freq = training_progress_output_freq / 3

    for i in range(0, num_mbs):
        mb = minibatch_source.get_next_minibatch(mb_size)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        arguments = {image_input: mb[features_si], label_var: mb[labels_si]}
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)

    test_minibatch_source = create_test_mb_source(feats_stream_name,
                                                  labels_stream_name,
                                                  image_height, image_width,
                                                  num_channels, num_classes,
                                                  base_path)
    features_si = test_minibatch_source[feats_stream_name]
    labels_si = test_minibatch_source[labels_stream_name]

    mb_size = 64
    num_mbs = 300

    total_error = 0.0
    for i in range(0, num_mbs):
        mb = test_minibatch_source.get_next_minibatch(mb_size)

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        arguments = {image_input: mb[features_si], label_var: mb[labels_si]}
        error = trainer.test_minibatch(arguments)
        total_error += error

    return total_error / num_mbs
Example #27
0
def sequence_to_sequence_translator(debug_output=False, run_test=False):

    input_vocab_dim = 69
    label_vocab_dim = 69

    # network complexity; initially low for faster testing
    hidden_dim = 256
    num_layers = 1

    # Source and target inputs to the model
    batch_axis = Axis.default_batch_axis()
    input_seq_axis = Axis('inputAxis')
    label_seq_axis = Axis('labelAxis')

    input_dynamic_axes = [batch_axis, input_seq_axis]
    raw_input = input_variable(
        shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes, name='raw_input')

    label_dynamic_axes = [batch_axis, label_seq_axis]
    raw_labels = input_variable(
        shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes, name='raw_labels')

    # Instantiate the sequence to sequence translation model
    input_sequence = raw_input

    # Drop the sentence start token from the label, for decoder training
    label_sequence = slice(raw_labels, label_seq_axis, 1, 0) # <s> A B C </s> --> A B C </s>
    label_sentence_start = sequence.first(raw_labels)        # <s>

    is_first_label = sequence.is_first(label_sequence)       # <s> 0 0 0 ...
    label_sentence_start_scattered = sequence.scatter(
        label_sentence_start, is_first_label)

    # Encoder
    encoder_outputH = stabilize(input_sequence)
    for i in range(0, num_layers):
        (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(
            encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value)

    thought_vectorH = sequence.first(encoder_outputH)
    thought_vectorC = sequence.first(encoder_outputC)

    thought_vector_broadcastH = sequence.broadcast_as(
        thought_vectorH, label_sequence)
    thought_vector_broadcastC = sequence.broadcast_as(
        thought_vectorC, label_sequence)

    # Decoder
    decoder_history_hook = alias(label_sequence, name='decoder_history_hook') # copy label_sequence

    decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(
        decoder_history_hook))

    decoder_outputH = stabilize(decoder_input)
    for i in range(0, num_layers):
        if (i > 0):
            recurrence_hookH = past_value
            recurrence_hookC = past_value
        else:
            isFirst = sequence.is_first(label_sequence)
            recurrence_hookH = lambda operand: element_select(
                isFirst, thought_vector_broadcastH, past_value(operand))
            recurrence_hookC = lambda operand: element_select(
                isFirst, thought_vector_broadcastC, past_value(operand))

        (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization(
            decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC)

    decoder_output = decoder_outputH

    # Softmax output layer
    z = linear_layer(stabilize(decoder_output), label_vocab_dim)

    # Criterion nodes
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)

    # network output for decoder history
    net_output = hardmax(z)

    # make a clone of the graph where the ground truth is replaced by the network output
    ng = z.clone(CloneMethod.share, {decoder_history_hook.output : net_output.output})

    # Instantiate the trainer object to drive the model training
    lr = 0.007
    minibatch_size = 72
    momentum_time_constant = 1100
    m_schedule = momentum_schedule(momentum_time_constant)
    clipping_threshold_per_sample = 2.3
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(z.parameters, lr, m_schedule, clipping_threshold_per_sample, gradient_clipping_with_truncation)
    trainer = Trainer(z, ce, errs, learner)

    # setup data
    rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf"
    train_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
    valid_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tiny.ctf")

    # readers
    randomize_data = True
    if run_test:
        randomize_data = False # because we want to get an exact error

    train_reader = create_reader(train_path, randomize_data, input_vocab_dim, label_vocab_dim)
    train_bind = {
        raw_input  : train_reader.streams.features,
        raw_labels : train_reader.streams.labels
    }

    # get the vocab for printing output sequences in plaintext
    rel_path = r"../../../../Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.mapping"
    vocab_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
    vocab = [w.strip() for w in open(vocab_path).readlines()]
    i2w = { i:ch for i,ch in enumerate(vocab) }

    # Get minibatches of sequences to train with and perform model training
    i = 0
    mbs = 0
    epoch_size = 908241
    max_epochs = 10
    training_progress_output_freq = 500

    # make things more basic for running a quicker test
    if run_test:
        epoch_size = 5000
        max_epochs = 1
        training_progress_output_freq = 30

    valid_reader = create_reader(valid_path, False, input_vocab_dim, label_vocab_dim)
    valid_bind = {
            find_arg_by_name('raw_input',ng)  : valid_reader.streams.features,
            find_arg_by_name('raw_labels',ng) : valid_reader.streams.labels
        }

    for epoch in range(max_epochs):
        loss_numer = 0
        metric_numer = 0
        denom = 0

        while i < (epoch+1) * epoch_size:
            # get next minibatch of training data
            mb_train = train_reader.next_minibatch(minibatch_size, input_map=train_bind)
            trainer.train_minibatch(mb_train)

            # collect epoch-wide stats
            samples = trainer.previous_minibatch_sample_count
            loss_numer += trainer.previous_minibatch_loss_average * samples
            metric_numer += trainer.previous_minibatch_evaluation_average * samples
            denom += samples

            # every N MBs evaluate on a test sequence to visually show how we're doing
            if mbs % training_progress_output_freq == 0:
                mb_valid = valid_reader.next_minibatch(minibatch_size, input_map=valid_bind)
                e = ng.eval(mb_valid)
                print_sequences(e, i2w)

            print_training_progress(trainer, mbs, training_progress_output_freq)
            i += mb_train[raw_labels].num_samples
            mbs += 1

        print("--- EPOCH %d DONE: loss = %f, errs = %f ---" % (epoch, loss_numer/denom, 100.0*(metric_numer/denom)))


    error1 = translator_test_error(z, trainer, input_vocab_dim, label_vocab_dim)

    save_model(z, "seq2seq.dnn")
    z = load_model("seq2seq.dnn")

    label_seq_axis = Axis('labelAxis')
    label_sequence = slice(find_arg_by_name('raw_labels',z), label_seq_axis, 1, 0)
    ce = cross_entropy_with_softmax(z, label_sequence)
    errs = classification_error(z, label_sequence)
    trainer = Trainer(z, ce, errs, [momentum_sgd(
                    z.parameters, lr, m_schedule, clipping_threshold_per_sample, gradient_clipping_with_truncation)])

    error2 = translator_test_error(z, trainer, input_vocab_dim, label_vocab_dim)

    assert error1 == error2

    return error1
Example #28
0
def train_sequence_classifier(debug_output=False):
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes,
                           dynamic_axes=[Axis.default_batch_axis()])

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifer_net(features,
                                                    num_output_classes,
                                                    embedding_dim, hidden_dim,
                                                    cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf"
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)
    feature_stream_name = 'features'
    labels_stream_name = 'labels'

    mb_source = text_format_minibatch_source(path, [
        StreamConfiguration(feature_stream_name, input_dim, True, 'x'),
        StreamConfiguration(labels_stream_name, num_output_classes, False, 'y')
    ], FULL_DATA_SWEEP)

    features_si = mb_source[features]
    labels_si = mb_source[label]

    # Instantiate the trainer object to drive the model training
    trainer = Trainer(classifier_output, ce, pe,
                      [sgd(classifier_output.parameters, lr=0.0005)])

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200
    training_progress_output_freq = 10
    i = 0

    if debug_output:
        training_progress_output_freq = training_progress_output_freq / 3

    while True:
        mb = mb_source.next_minibatch(minibatch_size)

        if len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        arguments = {features: mb[features_si], label: mb[labels_si]}
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)
        i += 1

    import copy

    evaluation_average = copy.copy(
        trainer.previous_minibatch_evaluation_average)
    loss_average = copy.copy(trainer.previous_minibatch_loss_average)

    return evaluation_average, loss_average