Exemple #1
0
def test_focal_loss():
    ce = C.cross_entropy_with_softmax([[1., 2., 3., 4.]],
                                      [[0.35, 0.15, 0.05, 0.45]]).eval()
    fl = Cx.focal_loss_with_softmax([[1., 2., 3., 4.]],
                                    [[0.35, 0.15, 0.05, 0.45]],
                                    alpha=1,
                                    gamma=0).eval()

    np.testing.assert_almost_equal(ce, fl, decimal=6)

    ce = C.cross_entropy_with_softmax([[0, 0, 0.8, 0.2]],
                                      [[0, 0, 1, 0]]).eval()
    fl = Cx.focal_loss_with_softmax([[0, 0, 0.8, 0.2]], [[0, 0, 1, 0]],
                                    gamma=2).eval()

    np.testing.assert_array_less(fl, ce)
    np.testing.assert_almost_equal(fl,
                                   np.array([[0.31306446]], dtype=np.float32),
                                   decimal=6)

    ce = C.cross_entropy_with_softmax([[0, 0, 0.2, 0.8]],
                                      [[0, 0, 1, 0]]).eval()
    fl = Cx.focal_loss_with_softmax([[0, 0, 0.2, 0.8]], [[0, 0, 1, 0]]).eval()

    np.testing.assert_array_less(fl, ce)

    ce = C.cross_entropy_with_softmax([[0, 0, -0.2, 50]],
                                      [[0, 0, 1, 0]]).eval()
    fl = Cx.focal_loss_with_softmax([[0, 0, -0.2, 50]], [[0, 0, 1, 0]]).eval()

    np.testing.assert_equal(ce, fl)
Exemple #2
0
def test_sequence_unpack_backprop(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    x_seq_input = C.sequence.input_variable(input_vocab_size, is_sparse=True, name='features')
    label_input = C.input_variable(num_labels, is_sparse=True, name='labels')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = C.sequence.last(C.layers.Recurrence(C.plus)(model))
    ce = C.cross_entropy_with_softmax(z, label_input)
    seq1_data = [[0, 1, 1], [0, 1, 0], [1, 0, 0]]
    seq2_data = [[0, 0, 1], [0, 1, 1]]
    label_data = _to_csr([[0, 1], [1, 0]])
    param_grads_1, loss_result_1 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_input : label_data},
                                           wrt=ce.parameters, outputs=[ce], as_numpy=False)

    z = C.sequence.reduce_sum(model)
    ce = C.cross_entropy_with_softmax(z, label_input)
    param_grads_2, loss_result_2 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_input : label_data},
                                           wrt=ce.parameters, outputs=[ce], as_numpy=False)

    assert np.allclose(loss_result_1.asarray(), loss_result_2.asarray())

    for param in param_grads_1:
        if not param_grads_1[param].is_sparse:
            reference_grad_value = param_grads_1[param].asarray()
            grad_value = param_grads_2[param].asarray()
            assert np.allclose(reference_grad_value, grad_value)
Exemple #3
0
def test_sequence_unpack_backprop(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    x_seq_input = C.sequence.input_variable(input_vocab_size, is_sparse=True, name='features')
    label_input = C.input_variable(num_labels, is_sparse=True, name='labels')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = C.sequence.last(C.layers.Recurrence(C.plus)(model))
    ce = C.cross_entropy_with_softmax(z, label_input)
    seq1_data = [[0, 1, 1], [0, 1, 0], [1, 0, 0]]
    seq2_data = [[0, 0, 1], [0, 1, 1]]
    label_data = _to_csr([[0, 1], [1, 0]])
    param_grads_1, loss_result_1 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_input : label_data},
                                           wrt=ce.parameters, outputs=[ce], as_numpy=False)

    z = C.sequence.reduce_sum(model)
    ce = C.cross_entropy_with_softmax(z, label_input)
    param_grads_2, loss_result_2 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_input : label_data},
                                           wrt=ce.parameters, outputs=[ce], as_numpy=False)

    assert np.allclose(loss_result_1.asarray(), loss_result_2.asarray())

    for param in param_grads_1:
        if not param_grads_1[param].is_sparse:
            reference_grad_value = param_grads_1[param].asarray()
            grad_value = param_grads_2[param].asarray()
            assert np.allclose(reference_grad_value, grad_value)
Exemple #4
0
def create_criterion(network):
    '''Create the criterion for model'''
    model, label1, label2 = network['model'], network['row_label'], network['col_label']
    label1_ce = C.cross_entropy_with_softmax(model.outputs[0], label1)
    label2_ce = C.cross_entropy_with_softmax(model.outputs[1], label2)
    label1_pe = C.classification_error(model.outputs[0], label1)
    label2_pe = C.classification_error(model.outputs[1], label2)
    label_ce = label1_ce + label2_ce
    label_pe = label1_pe + label2_pe
    return (label_ce, label_pe)
Exemple #5
0
def create_criterion(network):
    '''Create the criterion for model'''
    model, label1, label2 = network['model'], network['row_label'], network['col_label']
    label1_ce = C.cross_entropy_with_softmax(model.outputs[0], label1)
    label2_ce = C.cross_entropy_with_softmax(model.outputs[1], label2)
    label1_pe = C.classification_error(model.outputs[0], label1)
    label2_pe = C.classification_error(model.outputs[1], label2)
    label_ce = label1_ce + label2_ce
    label_pe = label1_pe + label2_pe
    return (label_ce, label_pe)
def init_model(m):
    progress_writers = [
        cntk.logging.ProgressPrinter(
            freq=int(BATCHSIZE / 2),
            rank=cntk.train.distributed.Communicator.rank(),
            num_epochs=EPOCHS)
    ]

    # Loss (dense labels); check if support for sparse labels
    loss = cntk.cross_entropy_with_softmax(m, labels)
    # Momentum SGD
    # https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_use_learners.ipynb
    # unit_gain=False: momentum_direction = momentum*old_momentum_direction + gradient
    # if unit_gain=True then ...(1-momentum)*gradient
    local_learner = cntk.momentum_sgd(
        m.parameters,
        lr=cntk.learning_rate_schedule(LR, cntk.UnitType.minibatch),
        momentum=cntk.momentum_schedule(MOMENTUM),
        unit_gain=False)

    distributed_learner = cntk.train.distributed.data_parallel_distributed_learner(
        local_learner)

    trainer = cntk.Trainer(m, (loss, cntk.classification_error(m, labels)),
                           [distributed_learner], progress_writers)

    return trainer, distributed_learner
Exemple #7
0
def create_criterion_function(model, y_pre, labels, self_penalty):
    loss = C.cross_entropy_with_softmax(y_pre, labels)
    if self_penalty:
        p_coefficient = 1
        loss += model.p * p_coefficient
    errs = C.classification_error(y_pre, labels)
    return loss, errs  # (model, labels) -> (loss, error metric)
 def criterion(input, labels):
     # criterion function must drop the <s> from the labels
     postprocessed_labels = sequence.slice(labels, 1, 0) # <s> A B C </s> --> A B C </s>
     z = model(input, postprocessed_labels)
     ce   = cross_entropy_with_softmax(z, postprocessed_labels)
     errs = classification_error      (z, postprocessed_labels)
     return (ce, errs)
def run_cntk():
    text, chars, char_indices, x_train, y_train = get_data(one_hot_encode_features=False)
    alphabet_size = len(chars)
    print('alphabet_size=', alphabet_size)
    model = build_model_cntk(alphabet_size=alphabet_size)
    model_filename = 'ch8-1_cntk.model'
    model.save(model_filename)
    model = None
    model = cntk.load_model(model_filename)

    x = cntk.sequence.input_variable(shape=(), dtype=np.float32)
    y = cntk.input_variable(shape=(), dtype=np.float32)
    model.replace_placeholders({model.placeholders[0]: x})

    y_oneHot = cntk.one_hot(y, num_classes=alphabet_size)
    loss_function = cntk.cross_entropy_with_softmax(model.output, y_oneHot)
    learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.001), cntk.learning_parameter_schedule_per_sample(0.9))
    trainer = cntk.Trainer(model, (loss_function, loss_function), [learner],)

    for epoch in range(1, 60):
        print('epoch', epoch)
        cntk_train(x, y, x_train, y_train, max_epochs=32, batch_size=128, trainer=trainer)
        model_filename = 'final_ch8-1_cntk.model'
        model.save(model_filename)
        generate_text_cntk(char_indices, chars, model, text)
Exemple #10
0
def test_learner_logging():
    from cntk import Trainer
    from cntk.logging import ProgressPrinter
    from cntk import cross_entropy_with_softmax, classification_error

    features = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1,), init=w_init)
    z = features * w
    labels = C.input_variable(shape=(1,), name='b')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    writer = TestProgressWriter();
    lr_values = [0.3, 0.2, 0.1, 0]
    m_values = [0.6, 0.7, 0.8]
    learner = C.momentum_sgd(z.parameters,
                  learning_rate_schedule(lr_values, UnitType.sample, 1),
                  C.momentum_schedule(m_values, 1))
    trainer = Trainer(z, (ce, errs), [learner], writer)

    for i in range(10):
        trainer.train_minibatch({features: [[2.]], labels: [[1.]]})

    assert len(writer.log_output) == len(lr_values + m_values)

    values = [j for i in zip(lr_values,m_values) for j in i] + [0]

    for i in range(len(values)):
        assert (values[i] == writer.log_output[i])
Exemple #11
0
def create_binary_convolution_model():

    # Input variables denoting the features and label data
    feature_var = C.input((num_channels, image_height, image_width))
    label_var = C.input((num_classes))

    # apply model to input
    scaled_input = C.element_times(C.constant(0.00390625), feature_var)

    # first layer is ok to be full precision
    z = C.layers.Convolution((3, 3), 32, pad=True,
                             activation=C.relu)(scaled_input)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = C.layers.Convolution((3, 3), 128, pad=True)(z)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = C.layers.Convolution((3, 3), 128, pad=True)(z)
    z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = C.layers.Convolution((1, 1), num_classes, pad=True)(z)
    z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z)
    z = C.reshape(z, (num_classes, ))

    SP = C.parameter(shape=z.shape, init=0.001)
    z = C.element_times(z, SP)

    ce = C.cross_entropy_with_softmax(z, label_var)
    pe = C.classification_error(z, label_var)

    return C.combine([z, ce, pe])
Exemple #12
0
def cross_entropy_with_softmax(target_vector, output_vector, name=''):
    '''
    This operation computes the cross entropy over the softmax of the `output_vector`.
    It expects the `output_vector` as unscaled, and it computes softmax over 
    the `output_vector` internally.  Any `output_vector` input over which softmax is 
    already computed before passing to this operator will be incorrect.
    
    :math:`cross\_entropy\_with\_softmax(t, o) = {-{\sum_{i \in \{1,len(t)\}} t_i \log(softmax(o_i)) }}`
    
    Example:
        >>> C.eval(C.cross_entropy_with_softmax([0., 0., 0., 1.], [1., 1., 1., 50.]))
        #[0.]
        
        >>> C.eval(C.cross_entropy_with_softmax([0.35, 0.15, 0.05, 0.45], [1., 2., 3., 4.]))
        #[1.84]
    
    Args:
        target_vector: usually it is one-hot vector where the hot bit corresponds to the label index. 
        But it can be any probability distribution over the labels.
        output_vector: the unscaled computed output values from the network
        name (str): the name of the node in the network            
    Returns:
        :class:`cntk.Function`
    '''
    from cntk import cross_entropy_with_softmax
    target_vector = sanitize_input(target_vector, get_data_type(output_vector))
    output_vector = sanitize_input(output_vector, get_data_type(target_vector))
    return cross_entropy_with_softmax(target_vector, output_vector, name).output()
Exemple #13
0
def trainDNN(trainX, trainY):
    numOutputClasses = 2

    newCol = np.where(trainY == 0, 1, 0)
    newCol = pd.DataFrame(newCol)
    trainY = trainY.reset_index(drop=True)
    trainY = pd.concat([trainY, newCol], axis=1, ignore_index=True)
    inputDim = trainX.shape[1]
    trainX = np.ascontiguousarray(trainX.as_matrix().astype(np.float32))
    trainY = np.ascontiguousarray(trainY.as_matrix().astype(np.float32))

    input = C.input_variable(inputDim)
    label = C.input_variable(numOutputClasses)

    classifier = create_model(input)
    loss = C.cross_entropy_with_softmax(classifier, label)
    evalError = C.classification_error(classifier, label)

    learning_rate = 0.5
    lrSchedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(classifier.parameters, lrSchedule)
    trainer = C.Trainer(classifier, (loss, evalError), [learner])

    minibatchSize = 25
    numSamples = trainX.shape[0] - (trainX.shape[0] % 25)
    numMinibatchesToTrain = numSamples / minibatchSize

    #train the model
    for i in range(0, int(numMinibatchesToTrain)):
        trainX, trainY, features, labels = getMinibatch(
            trainX, trainY, minibatchSize)
        trainer.train_minibatch({input: features, label: labels})

    return [classifier, trainer, input, label]
Exemple #14
0
def test_usermbsource_training(tmpdir):
    input_dim = 1000
    num_output_classes = 5

    mbs = MyDataSource(input_dim, num_output_classes)

    from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
            classification_error, learning_rate_schedule, sgd, Trainer, \
            training_session, times, UnitType

    feature = sequence.input_variable(shape=(input_dim, ))
    label = C.input_variable(shape=(num_output_classes, ))
    p = parameter(shape=(input_dim, num_output_classes), init=10)
    z = times(sequence.reduce_sum(feature), p, name='z')
    ce = cross_entropy_with_softmax(z, label)
    errs = classification_error(z, label)

    lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0],
                                           UnitType.sample)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])
    input_map = {feature: mbs.fsi, label: mbs.lsi}

    session = training_session(trainer=trainer,
                               mb_source=mbs,
                               model_inputs_to_streams=input_map,
                               mb_size=4,
                               max_samples=20)
    session.train()

    assert trainer.total_number_of_samples_seen == 20
Exemple #15
0
def create_network(para, verbose=False):
    with cntk.layers.default_options(init=cntk.glorot_uniform(), activation=cntk.ops.relu):
        # In order to accelerate the debugging step, we choose a simple structure with only 2 parameters

        h = cntk.layers.Convolution2D(filter_shape=(5, 5), num_filters=para[0],
                                      strides=(1, 1), pad=True, name='C1')(network_input / 255.0)
        h = cntk.layers.layers.MaxPooling(filter_shape=(5, 5), strides=(2, 2), )(h)

        h = cntk.layers.Convolution2D(filter_shape=(5, 5), num_filters=para[1],
                                      strides=(1, 1), pad=True, name='C2')(h)
        h = cntk.layers.layers.MaxPooling(filter_shape=(5, 5), strides=(2, 2))(h)

        h = cntk.layers.Convolution2D(filter_shape=(3, 3), num_filters=para[2],
                                      strides=(1, 1), pad=True, name='C2')(h)

        h = cntk.layers.Dense(para[3])(h)

        h = cntk.layers.Dropout(0.25)(h)

        z = cntk.layers.Dense(10, activation=None, name='R')(h)
    loss = cntk.cross_entropy_with_softmax(z, network_label)
    label_error = cntk.classification_error(z, network_label)
    lr_schedule = cntk.learning_rate_schedule(0.1, cntk.UnitType.minibatch)
    learner = cntk.momentum_sgd(z.parameters, lr_schedule, cntk.momentum_schedule(0.9))
    trainer = cntk.Trainer(z, (loss, label_error), [learner])
    if verbose: log = cntk.logging.ProgressPrinter(100)
    for _ in xrange(20000):
        data = train_reader.next_minibatch(100, input_map=mapping(train_reader))
        trainer.train_minibatch(data)
        if verbose: log.update_with_trainer(trainer)
    return trainer
def train_classifier(autoencoder_definition: Autoencoder):
    # Get the encoded layer, freeze its weights, add the classification and fine tune layers and train again
    encoded_model = autoencoder_definition.encoded_model
    feature_node = find_by_name(encoded_model, 'features')
    cloned_layers = C.combine([encoded_model]).clone(CloneMethod.freeze, {feature_node: features})

    classifier = autoencoder_definition.classifier
    full_model = classifier(cloned_layers)

    # Needs GraphViz. Ensure to add the path of Graphviz (anaconda folder/envs/'environment name'/library/bin/graphviz) into the system environment variables
    # plot_path = "full_model.png"
    # plot(full_model, plot_path)

    reader_train = create_reader(train_file, True, input_dim, num_output_classes)

    # Train Classifier
    # Instantiate the loss and error function.
    loss_function = C.cross_entropy_with_softmax(full_model, labels)
    error_function = C.classification_error(full_model, labels)

    input_map={
        labels : reader_train.streams.labels,
        features : reader_train.streams.features
    }

    trainer = train(reader=reader_train, model=full_model, loss_function=loss_function, error_function=error_function, input_map=input_map,
          num_sweeps_to_train_with = 100, num_samples_per_sweep = 2000, minibatch_size = 80, learning_rate = 0.02)

    full_model.save('full_model.model')

    return trainer
def create_resnet_network(network_name):
    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    # create model, and configure learning parameters 
    if network_name == 'resnet20': 
        z = create_cifar10_model(input_var, 3, num_classes)
    elif network_name == 'resnet110': 
        z = create_cifar10_model(input_var, 18, num_classes)
    else: 
        return RuntimeError("Unknown model name!")

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)

    return {
        'name' : network_name,
        'feature': input_var,
        'label': label_var,
        'ce' : ce,
        'pe' : pe,
        'output': z
    }
def create_resnet_network(network_name):
    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    # create model, and configure learning parameters
    if network_name == 'resnet20':
        z = create_cifar10_model(input_var, 3, num_classes)
    elif network_name == 'resnet110':
        z = create_cifar10_model(input_var, 18, num_classes)
    else:
        return RuntimeError("Unknown model name!")

    # loss and metric
    ce = cross_entropy_with_softmax(z, label_var)
    pe = classification_error(z, label_var)

    return {
        'name': network_name,
        'feature': input_var,
        'label': label_var,
        'ce': ce,
        'pe': pe,
        'output': z
    }
Exemple #19
0
def seqcla():

    # LSTM params
    input_dim = 50
    output_dim = 128
    cell_dim = 128
    
    # model
    num_labels = 5
    vocab = 2000
    embed_dim = 50    

    t = C.dynamic_axis(name='t')
    features = C.sparse_input(vocab, dynamic_axis=t, name='features')    
    labels = C.input(num_labels, name='labels')
   
    train_reader = C.CNTKTextFormatReader(train_file)

    # setup embedding matrix
    embedding = C.parameter((embed_dim, vocab), learning_rate_multiplier=0.0, 
                             init_from_file_path=embedding_file)

    # get the vector representing the word
    sequence = C.times(embedding, features, name='sequence')
    
    # add an LSTM layer
    L = lstm_layer(output_dim, cell_dim, sequence, input_dim)
    
    # add a softmax layer on top
    w = C.parameter((num_labels, output_dim), name='w')
    b = C.parameter((num_labels), name='b')
    z = C.times(w, L) + b
    z.name='z'
    z.tag = "output"
    
    # and reconcile the shared dynamic axis
    pred = C.reconcile_dynamic_axis(z, labels, name='pred')    
    
    ce = C.cross_entropy_with_softmax(labels, pred)
    ce.tag = "criterion"
    
    my_sgd = C.SGDParams(epoch_size=0, minibatch_size=10, learning_rates_per_mb=0.1, max_epochs=3)    
    
    with C.LocalExecutionContext('seqcla') as ctx:
        # train the model
        ctx.train(root_nodes=[ce], training_params=my_sgd, input_map=train_reader.map(
                  features, alias='x', dim=vocab, format='Sparse').map(
                  labels, alias='y', dim=num_labels, format='Dense'))        
        
        # write out the predictions
        ctx.write(input_map=train_reader.map(
                  features, alias='x', dim=vocab, format='Sparse').map(
                  labels, alias='y', dim=num_labels, format='Dense'))
                  
        # do some manual accuracy testing
        acc = calc_accuracy(train_file, ctx.output_filename_base)
        
        # and test for the same number...
        TOLERANCE_ABSOLUTE = 1E-02
        assert np.allclose(acc, 0.6006415396952687, atol=TOLERANCE_ABSOLUTE)
Exemple #20
0
 def criterion(input:InputSequence[C.layers.Tensor[input_vocab_dim]]
         ,labels:LabelSequence[C.layers.Tensor[label_vocab_dim]]):
     postprocessed_labels = C.sequence.slice(labels, 1, 0) # <s> A B C </s> --> A B C </s>
     z = model(input, postprocessed_labels)
     ce = C.cross_entropy_with_softmax(z, postprocessed_labels)
     errs = C.classification_error(z, postprocessed_labels)
     return (ce, errs)
Exemple #21
0
    def createDecoderNetwork(self, networkHiddenSrc, srcLength, trgLength):
        timeZeroHidden = C.slice(networkHiddenSrc, 0, 0, 1)
        srcSentEmb = C.slice(timeZeroHidden, -1, Config.SrcHiddenSize,
                             Config.SrcHiddenSize * 2)
        networkHiddenTrg = {}
        inputTrg = C.reshape(self.inputMatrixTrg,
                             shape=(Config.TrgMaxLength, Config.BatchSize,
                                    Config.TrgVocabSize))
        attProbAll = []
        tce = 0
        for i in range(0, trgLength, 1):

            preTrgEmb = self.initTrgEmb if i == 0 else self.EmbTrg(inputTrg[i -
                                                                            1])

            if (i == 0):
                networkHiddenTrg[i] = self.createDecoderInitNetwork(srcSentEmb)
            else:
                (networkHiddenTrg[i], attProb) = self.createDecoderRNNNetwork(
                    networkHiddenSrc, preTrgEmb, networkHiddenTrg[i - 1],
                    srcLength)
                attProbAll = attProb if i == 1 else C.splice(
                    attProbAll, attProb, axis=0)

            preSoftmax = self.createReadOutNetwork(networkHiddenTrg[i],
                                                   preTrgEmb)
            ce = C.cross_entropy_with_softmax(preSoftmax, inputTrg[i], 2)
            ce = C.reshape(ce, shape=(1, Config.BatchSize))
            tce += C.times_transpose(ce, self.maskMatrixTrg[i])

        return tce
Exemple #22
0
def test_learner_logging():
    from cntk import Trainer
    from cntk.logging import ProgressPrinter
    from cntk import cross_entropy_with_softmax, classification_error

    features = C.input_variable(shape=(1,), needs_gradient=True, name='a')
    w_init = 1
    w = parameter(shape=(1,), init=w_init)
    z = features * w
    labels = C.input_variable(shape=(1,), name='b')
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error(z, labels)

    writer = TestProgressWriter();
    lr_values = [0.3, 0.2, 0.1, 0]
    m_values = [0.6, 0.7, 0.8]
    learner = C.momentum_sgd(z.parameters,
                  learning_rate_schedule(lr_values, UnitType.sample, 1),
                  C.momentum_schedule(m_values, 1))
    trainer = Trainer(z, (ce, errs), [learner], writer)

    for i in range(10):
        trainer.train_minibatch({features: [[2.]], labels: [[1.]]})
    
    assert len(writer.log_output) == len(lr_values + m_values)

    values = [j for i in zip(lr_values,m_values) for j in i] + [0]

    for i in range(len(values)):
        assert (values[i] == writer.log_output[i])
Exemple #23
0
def train_lm(testing=False):
    data = DataReader(token_to_id_path, segment_sepparator)

    # Create model nodes for the source and target inputs
    input_sequence, label_sequence = create_inputs(data.vocab_dim)

    # Create the model. It has three output nodes
    # z: the input to softmax that  provides the latent representation of the next token
    # cross_entropy: this is used training criterion
    # error: this a binary indicator if the model predicts the correct token
    z, cross_entropy, error = create_model(input_sequence, label_sequence, data.vocab_dim, hidden_dim)

    # For measurement we use the (build in) full softmax.
    full_ce = C.cross_entropy_with_softmax(z, label_sequence)

    # print out some useful training information
    log_number_of_parameters(z) ; print()
    
    # Run the training loop
    num_trained_samples = 0
    num_trained_samples_since_last_report = 0

    # Instantiate the trainer object to drive the model training
    lr_schedule = C.learning_parameter_schedule_per_sample(learning_rate)
    momentum_schedule = C.momentum_schedule_per_sample(momentum_per_sample)
    gradient_clipping_with_truncation = True
    learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule,
                            gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
                            gradient_clipping_with_truncation=gradient_clipping_with_truncation)
    trainer = Trainer(z, (cross_entropy, error), learner)

    last_avg_ce = 0
    for epoch_count in range(num_epochs):
        for features, labels, token_count in data.minibatch_generator(train_file_path, sequence_length, sequences_per_batch):
            arguments = ({input_sequence : features, label_sequence : labels})

            t_start = timeit.default_timer()
            trainer.train_minibatch(arguments)
            t_end =  timeit.default_timer()

            samples_per_second = token_count / (t_end - t_start)

            # Print progress report every num_samples_between_progress_report samples

            if num_trained_samples_since_last_report >= num_samples_between_progress_report or num_trained_samples == 0:
                av_ce = average_cross_entropy(full_ce, input_sequence, label_sequence, data)
                print_progress(samples_per_second, av_ce, num_trained_samples, t_start)
                num_trained_samples_since_last_report = 0
                last_avg_ce = av_ce

            num_trained_samples += token_count
            num_trained_samples_since_last_report += token_count

        if not testing:
            # after each epoch save the model
            model_filename = "models/lm_epoch%d.dnn" % epoch_count
            z.save(model_filename)
            print("Saved model to '%s'" % model_filename)

    return last_avg_ce
Exemple #24
0
def create_network():
    # Create the input and target variables
    input_var = cntk.input_variable(
        (sequence_length, frame_height, frame_width), name='input_var')
    target_var = cntk.input_variable((num_classes, ),
                                     is_sparse=True,
                                     name='target_var')

    input_head = cntk.slice(input_var, axis=0, begin_index=0, end_index=19)
    input_tail = cntk.slice(input_var, axis=0, begin_index=1, end_index=20)
    diff = input_tail - input_head

    model = Sequential([
        resnet_model(cntk.placeholder()),
        Label('resnet'),
        Dense(num_classes, name='output')
    ])(diff)

    return {
        'input': input_var,
        'target': target_var,
        'model': model,
        'loss': cntk.cross_entropy_with_softmax(model, target_var),
        'metric': cntk.classification_error(model, target_var)
    }
Exemple #25
0
def seqcla():

    # LSTM params
    input_dim = 50
    output_dim = 128
    cell_dim = 128
    
    # model
    num_labels = 5
    vocab = 2000
    embed_dim = 50    

    t = C.dynamic_axis(name='t')
    # temporarily using cntk1 SparseInput because cntk2's Input() will simply allow sparse as a parameter
    features = cntk1.SparseInput(vocab, dynamicAxis=t, name='features')    
    labels = C.input(num_labels, name='labels')
   
    train_reader = C.CNTKTextFormatReader(train_file)

    # setup embedding matrix
    embedding = C.parameter((embed_dim, vocab), learning_rate_multiplier=0.0, 
                             init_from_file_path=embedding_file)

    # get the vector representing the word
    sequence = C.times(embedding, features, name='sequence')
    
    # add an LSTM layer
    L = lstm_layer(output_dim, cell_dim, sequence, input_dim)
    
    # add a softmax layer on top
    w = C.parameter((num_labels, output_dim), name='w')
    b = C.parameter((num_labels), name='b')
    z = C.plus(C.times(w, L), b, name='z')
    z.tag = "output"
    
    # and reconcile the shared dynamic axis
    pred = C.reconcile_dynamic_axis(z, labels, name='pred')    
    
    ce = C.cross_entropy_with_softmax(labels, pred)
    ce.tag = "criterion"
    
    my_sgd = C.SGDParams(epoch_size=0, minibatch_size=10, learning_rates_per_mb=0.1, max_epochs=3)    
    
    with C.LocalExecutionContext('seqcla') as ctx:
        # train the model
        ctx.train(root_nodes=[ce], training_params=my_sgd, input_map=train_reader.map(
                  features, alias='x', dim=vocab, format='Sparse').map(
                  labels, alias='y', dim=num_labels, format='Dense'))        
        
        # write out the predictions
        ctx.write(input_map=train_reader.map(
                  features, alias='x', dim=vocab, format='Sparse').map(
                  labels, alias='y', dim=num_labels, format='Dense'))
                  
        # do some manual accuracy testing
        acc = calc_accuracy(train_file, ctx.output_filename_base)
        
        # and test for the same number...
        TOLERANCE_ABSOLUTE = 1E-02
        assert np.allclose(acc, 0.6006415396952687, atol=TOLERANCE_ABSOLUTE)
Exemple #26
0
def cross_entropy_with_softmax(output_vector, target_vector, name=''):
    '''
    This operation computes the cross entropy over the softmax of the `output_vector`.
    It expects the `output_vector` as unscaled, and it computes softmax over 
    the `output_vector` internally.  Any `output_vector` input over which softmax is 
    already computed before passing to this operator will be incorrect.
    
    :math:`cross\_entropy\_with\_softmax(o, t) = {-{\sum_{i \in \{1,len(t)\}} t_i \log(softmax(o_i)) }}`
    
    Example:
        >>> C.eval(C.cross_entropy_with_softmax([1., 1., 1., 50.], [0., 0., 0., 1.]))
        #[0.]
        
        >>> C.eval(C.cross_entropy_with_softmax([1., 2., 3., 4.], [0.35, 0.15, 0.05, 0.45]))
        #[1.84]
    
    Args:
        output_vector: the unscaled computed output values from the network
        target_vector: usually it is one-hot vector where the hot bit corresponds to the label index. 
        But it can be any probability distribution over the labels.
        name (str): the name of the node in the network            
    Returns:
        :class:`cntk.Function`
    '''
    from cntk import cross_entropy_with_softmax
    output_vector = sanitize_input(output_vector, get_data_type(target_vector))
    target_vector = sanitize_input(target_vector, get_data_type(output_vector))
    return cross_entropy_with_softmax(output_vector, target_vector, name).output()
def criterion(data, label_one_hot):
    z = model(
        data
    )  # apply model. Computes a non-normalized log probability for every output class.
    loss = C.cross_entropy_with_softmax(
        z, label_one_hot)  # this applies softmax to z under the hood
    metric = C.classification_error(z, label_one_hot)
    return loss, metric
Exemple #28
0
 def criterion(input: InputSequence[C.layers.Tensor[input_vocab_dim]],
               labels: LabelSequence[C.layers.Tensor[label_vocab_dim]]):
     postprocessed_labels = C.sequence.slice(
         labels, 1, 0)  # <s> A B C </s> --> A B C </s>
     z = model(input, postprocessed_labels)
     ce = C.cross_entropy_with_softmax(z, postprocessed_labels)
     errs = C.classification_error(z, postprocessed_labels)
     return (ce, errs)
Exemple #29
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(
        StreamDefs(amazing_features=StreamDef(
            shape=feature_dim, context=(context, context), scp=features_file)))

    ld = HTKMLFDeserializer(
        label_mapping_file,
        StreamDefs(
            awesome_labels=StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd, ld])

    features = C.input_variable(((2 * context + 1) * feature_dim))
    labels = C.input_variable((num_classes))

    model = Sequential(
        [For(range(3), lambda: Recurrence(LSTM(256))),
         Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error(z, labels)

    learner = C.adam_sgd(z.parameters,
                         lr=C.learning_rate_schedule(lr, C.UnitType.sample,
                                                     epoch_size),
                         momentum=C.momentum_as_time_constant_schedule(1000),
                         low_memory=True,
                         gradient_clipping_threshold_per_sample=15,
                         gradient_clipping_with_truncation=True)
    trainer = C.Trainer(z, (ce, errs), learner)

    input_map = {
        features: reader.streams.amazing_features,
        labels: reader.streams.awesome_labels
    }

    pp = C.ProgressPrinter(freq=0)
    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
        pp.update_with_trainer(trainer, with_metric=True)
    assert True
    os.chdir(abs_path)
def create_resnet_network(network_name, fp16):
    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    dtype = np.float16 if fp16 else np.float32
    if fp16:
        graph_input = C.cast(input_var, dtype=np.float16)
        graph_label = C.cast(label_var, dtype=np.float16)
    else:
        graph_input = input_var
        graph_label = label_var

    with C.default_options(dtype=dtype):
        stride1x1 = (1, 1)
        stride3x3 = (2, 2)

        # create model, and configure learning parameters
        if network_name == 'resnet18':
            z = create_imagenet_model_basic(graph_input, [2, 1, 1, 2],
                                            num_classes)
        elif network_name == 'resnet34':
            z = create_imagenet_model_basic(graph_input, [3, 3, 5, 2],
                                            num_classes)
        elif network_name == 'resnet50':
            z = create_imagenet_model_bottleneck(graph_input, [2, 3, 5, 2],
                                                 num_classes, stride1x1,
                                                 stride3x3)
        elif network_name == 'resnet101':
            z = create_imagenet_model_bottleneck(graph_input, [2, 3, 22, 2],
                                                 num_classes, stride1x1,
                                                 stride3x3)
        elif network_name == 'resnet152':
            z = create_imagenet_model_bottleneck(graph_input, [2, 7, 35, 2],
                                                 num_classes, stride1x1,
                                                 stride3x3)
        else:
            return RuntimeError("Unknown model name!")

        # loss and metric
        ce = cross_entropy_with_softmax(z, graph_label)
        errs = classification_error(z, graph_label, topN=1)
        top5Errs = classification_error(z, graph_label, topN=1)

    if fp16:
        ce = C.cast(ce, dtype=np.float32)
        errs = C.cast(errs, dtype=np.float32)
        top5Errs = C.cast(top5Errs, dtype=np.float32)

    return {
        'name': network_name,
        'feature': input_var,
        'label': label_var,
        'ce': ce,
        'errs': errs,
        'top5Errs': top5Errs,
        'output': z
    }
Exemple #31
0
def create_network(feature_dim = 40, num_classes=256, feature_mean_file=None, feature_inv_stddev_file=None,
                       feature_norm_files = None, label_prior_file = None, context=(0,0), model_type=None):

    def MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file):
        m = C.reshape(load_ascii_vector(feature_mean_file,'feature_mean'), shape=(1, feature_dim))
        s = C.reshape(load_ascii_vector(feature_inv_stddev_file,'feature_invstddev'), shape=(1,feature_dim))
        def _func(operand):
            return C.reshape(C.element_times(C.reshape(operand,shape=(1+context[0]+context[1], feature_dim)) - m, s), shape=operand.shape)
        return _func


    def MyDNNLayer(hidden_size=128, num_layers=2):
        return C.layers.Sequential([
            C.layers.For(range(num_layers), lambda: C.layers.Dense(hidden_size, activation=C.sigmoid))
        ])

    def MyBLSTMLayer(hidden_size=128, num_layers=2):
        W = C.Parameter((C.InferredDimension, hidden_size), init=C.he_normal(1.0), name='rnn_parameters')
        def _func(operand):
            return C.optimized_rnnstack(operand, weights=W, hidden_size=hidden_size, num_layers=num_layers, bidirectional=True, recurrent_op='lstm' )
        return _func

    # Input variables denoting the features and label data
    feature_var = C.sequence.input_variable(feature_dim * (1+context[0]+context[1]))
    label_var = C.sequence.input_variable(num_classes)

    feature_norm = MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file)(feature_var)
    label_prior = load_ascii_vector(label_prior_file, 'label_prior')
    log_prior = C.log(label_prior)

    if (model_type=="DNN"):
        net = MyDNNLayer(512,4)(feature_norm)
    elif (model_type=="BLSTM"):
        net = MyBLSTMLayer(512,2)(feature_norm)
    else:
        raise RuntimeError("model_type must be DNN or BLSTM")

    out = C.layers.Dense(num_classes, init=C.he_normal(scale=1/3))(net)

    # loss and metric
    ce = C.cross_entropy_with_softmax(out, label_var)
    pe = C.classification_error(out, label_var)
    ScaledLogLikelihood = C.minus(out, log_prior, name='ScaledLogLikelihood')

    # talk to the user
    C.logging.log_number_of_parameters(out)
    print()

    return {
        'feature': feature_var,
        'label': label_var,
        'output': out,
        'ScaledLogLikelihood': ScaledLogLikelihood,
        'ce': ce,
        'pe': pe,
        'final_hidden': net # adding last hidden layer output for future use in CTC tutorial
    }
def train(nonlinearity,
          num_hidden_layers,
          device_id,
          minibatch_size=10,
          num_samples=1000):
    from cntk.cntk_py import always_allow_setting_default_device
    always_allow_setting_default_device()
    C.try_set_default_device(cntk_device(device_id))
    np.random.seed(0)

    learning_rate = 0.5
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)

    hidden_layers_dim = 50

    inp = C.input_variable((input_dim), np.float32)
    label = C.input_variable((num_output_classes), np.float32)

    z = fully_connected_classifier_net(inp, num_output_classes,
                                       hidden_layers_dim, num_hidden_layers,
                                       nonlinearity)

    loss = C.cross_entropy_with_softmax(z, label)
    eval_error = C.classification_error(z, label)

    learner = C.sgd(z.parameters, lr_schedule)
    trainer = C.Trainer(z, (loss, eval_error), [learner])

    num_minibatches_to_train = int(num_samples / minibatch_size)

    training_progress_output_freq = 20

    losses = []
    errors = []

    for i in range(num_minibatches_to_train):
        features, labels = generate_random_data_sample(minibatch_size,
                                                       input_dim,
                                                       num_output_classes)

        # Specify the input variables mapping in the model to actual minibatch
        # data for training.
        trainer.train_minibatch({
            inp: features,
            label: labels
        },
                                device=cntk_device(device_id))

        batchsize, loss, error = print_training_progress(
            trainer, i, training_progress_output_freq)

        if not (loss == "NA" or error == "NA"):
            losses.append(loss)
            errors.append(error)

    return losses, errors
Exemple #33
0
    def create_model(self):
        hidden_layers = [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 16, 32]
        first_input = C.ops.splice(self._input, self._target)
        first_input_size = first_input.shape
        first_input = C.ops.reshape(
            first_input, (first_input_size[0], 1, first_input_size[1]))

        model = C.layers.Convolution2D((1, 3),
                                       num_filters=8,
                                       pad=True,
                                       reduction_rank=1,
                                       activation=C.ops.tanh)(first_input)
        print(model)
        for h in hidden_layers:
            input_new = C.ops.splice(model, first_input, axis=0)
            model = C.layers.Convolution2D((1, 3),
                                           num_filters=h,
                                           pad=True,
                                           reduction_rank=1,
                                           activation=C.ops.tanh)(input_new)
            print(model)
        ######
        #model = C.ops.splice(model, self._target)
        # Dense layers
        direction = C.layers.Sequential([
            C.layers.Dense(720, activation=C.ops.relu),
            C.layers.Dense(360, activation=None)
        ])(model)

        velocity = C.layers.Sequential([
            C.layers.Dense(128, activation=C.ops.relu),
            C.layers.Dense(64, activation=None),
            C.layers.Dense(1, activation=None)
        ])(model)

        model = C.ops.splice(direction, velocity)
        if self._load_model:
            model = C.load_model(self._file_name)
            direction = model[0:360]
            velocity = model[360]

        C.logging.log_number_of_parameters(model)
        print(model)
        #loss = C.squared_error(direction, self._output) + C.squared_error(velocity, self._output_velocity)
        #error = C.squared_error(direction, self._output)  + C.squared_error(velocity, self._output_velocity)
        loss = C.cross_entropy_with_softmax(
            direction, self._output) + C.squared_error(velocity,
                                                       self._output_velocity)
        error = C.classification_error(direction,
                                       self._output) + C.squared_error(
                                           velocity, self._output_velocity)

        learner = C.adadelta(model.parameters, l2_regularization_weight=0.001)
        progress_printer = C.logging.ProgressPrinter(tag='Training')
        trainer = C.Trainer(model, (loss, error), learner, progress_printer)
        return model, loss, learner, trainer
def fineTuneModel(folder_with_data,path_to_label_csv="label.csv",
    original_model_path="../vgg13.model",max_epochs=10):

    trainingValues = getData(folder_with_data,path_to_label_csv)

    input_var =ct.input((1,height,width),np.float32)
    label_var = ct.input((num_classes), np.float32)
    print("cloning old model")
    z = clone_model(original_model_path,input_var)
    loss = ct.cross_entropy_with_softmax(z, label_var)
    metric = ct.classification_error(z, label_var) 

    minibatch_size = 32
    epoch_size = trainingValues.getLengthOfData()

    lr_per_minibatch = [learning_rate]*10+[learning_rate/2.0]
    mm_time_constant = -minibatch_size/np.log(0.9)
    lr_schedule = ct.learning_rate_schedule(lr_per_minibatch,
        unit=ct.UnitType.minibatch, epoch_size=epoch_size)
    mm_schedule = ct.momentum_as_time_constant_schedule(mm_time_constant)

    learner = ct.momentum_sgd(z.parameters, lr_schedule, mm_schedule)
    trainer = ct.Trainer(z, (loss, metric), learner)
    print("created trainer and learner")

    print("training started")
    while epoch < max_epochs :

        trainingValues.reset() 
        # Training 
        start_time = time.time()
        training_loss = 0
        training_accuracy = 0

        #mini-batch learning
        while trainingValues.hasMoreMinibatches():
            #while there is data for a mini batch:
            x,y,currBatchSize = trainingValues.getNextMinibatch(minibatch_size)
            # x - images y - labels/emotions
            trainer.train_minibatch({ input_var : x, label_var: y})

            #maintain stats:
            training_loss += trainer.previous_minibatch_loss_average *    currBatchSize
            training_accuracy += trainer.previous_minibatch_evaluation_average * currBatchSize
            
        training_accuracy /= trainingValues.getLengthOfData()
        training_accuracy = 1.0 - training_accuracy

        print("Epoch took:", time.time() - start_time, "seconds")
        print("training accuracy:\t\t{:.2f}%".format(training_accuracy*100))

        epoch +=1

    #SAVE MODEL
    z.save("../vgg13.model")
def ffnet(learner, trainer=None):
    inputs = 5
    outputs = 3
    layers = 2
    hidden_dimension = 3

    if trainer is None:
        # input variables denoting the features and label data
        features = C.input_variable((inputs), np.float32)
        label = C.input_variable((outputs), np.float32)

        # Instantiate the feedforward classification model
        my_model = Sequential([
            Dense(hidden_dimension,
                  activation=C.sigmoid,
                  init=C.glorot_uniform(seed=98052)),
            Dense(outputs, init=C.glorot_uniform(seed=98052))
        ])
        z = my_model(features)

        ce = C.cross_entropy_with_softmax(z, label)
        pe = C.classification_error(z, label)

        # Instantiate the trainer object to drive the model training
        progress_printer = ProgressPrinter(0)
        trainer = C.Trainer(z, (ce, pe), [learner(z.parameters)],
                            [progress_printer])
    else:
        features = trainer.loss_function.arguments[0]
        label = trainer.loss_function.arguments[1]

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 100

    aggregate_loss = 0.0
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs,
                                                      outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features: train_features, label: labels})
        sample_count = trainer.previous_minibatch_sample_count
        aggregate_loss += trainer.previous_minibatch_loss_average * sample_count

    last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen

    test_features, test_labels = generate_random_data(minibatch_size, inputs,
                                                      outputs)
    avg_error = trainer.test_minibatch({
        features: test_features,
        label: test_labels
    })
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return last_avg_error, avg_error, trainer
Exemple #36
0
 def __getTrainer(self, _learning_rate=0.03):
     loss = cntk.cross_entropy_with_softmax(self.__neural_network,
                                            self.__output_shape)
     errs = cntk.classification_error(self.__neural_network,
                                      self.__output_shape)
     return cntk.Trainer(self.__neural_network, (loss, errs), [
         cntk.sgd(
             self.__neural_network.parameters,
             cntk.learning_rate_schedule(_learning_rate,
                                         cntk.UnitType.minibatch))
     ])
Exemple #37
0
def train_eval_mnist_onelayer_from_file(criterion_name=None, eval_name=None):

    # Network definition
    feat_dim = 784
    label_dim = 10
    hidden_dim = 200
    
    cur_dir = os.path.dirname(__file__)

    training_filename = os.path.join(cur_dir, "Data", "Train-28x28_text.txt")
    test_filename = os.path.join(cur_dir, "Data", "Test-28x28_text.txt")

    features = C.input(feat_dim)
    features.name = 'features'

    feat_scale = C.constant(0.00390625)
    feats_scaled = C.element_times(features, feat_scale)

    labels = C.input(label_dim)
    labels.tag = 'label'
    labels.name = 'labels'

    traning_reader = C.CNTKTextFormatReader(training_filename)
    test_reader = C.CNTKTextFormatReader(test_filename)

    h1 = add_dnn_sigmoid_layer(feat_dim, hidden_dim, feats_scaled, 1)
    out = add_dnn_layer(hidden_dim, label_dim, h1, 1)
    out.tag = 'output'

    ec = C.cross_entropy_with_softmax(labels, out)
    ec.name = criterion_name
    ec.tag = 'criterion'
    
    eval = C.ops.square_error(labels, out)
    eval.name = eval_name
    eval.tag = 'eval'
    
    # Specify the training parameters (settings are scaled down)
    my_sgd = C.SGDParams(epoch_size=600, minibatch_size=32,
                       learning_rates_per_mb=0.1, max_epochs=5, momentum_per_mb=0)

    # Create a context or re-use if already there
    with C.LocalExecutionContext('mnist_one_layer', clean_up=True) as ctx:
        # CNTK actions
         ctx.train(
            root_nodes=[ec, eval],
            training_params=my_sgd,
            input_map=traning_reader.map(labels, alias='labels', dim=label_dim).map(features, alias='features', dim=feat_dim))
            
         result = ctx.test(
            root_nodes=[ec, eval],
            input_map=test_reader.map(labels, alias='labels', dim=label_dim).map(features, alias='features', dim=feat_dim))

         return result
Exemple #38
0
def test_focal_loss_image():
    output = C.input_variable((3, 1, 2))
    target = C.input_variable((3, 1, 2))

    o = np.random.random((3, 1, 2)).astype(np.float32)
    t = np.array([[[0, 1]], [[0, 0]], [[1, 0]]], dtype=np.float32)

    ce = C.cross_entropy_with_softmax(output, target, axis=0).eval({output: o, target: t})
    fl = Cx.focal_loss_with_softmax(output, target, alpha=1, gamma=0, axis=0).eval({output: o, target: t})

    np.testing.assert_almost_equal(ce, fl, decimal=5)
Exemple #39
0
def finalize_network(reader, model_details, max_amount_of_epochs,
                     samples_per_epoch, samples_per_minibatch,
                     pixel_dimensions, classes, learning_rate):
    features = input_variable(shape=(pixel_dimensions['depth'],
                                     pixel_dimensions['height'],
                                     pixel_dimensions['width']))
    label = input_variable(shape=len(classes))

    # speeds up training
    normalized_features = element_times(1.0 / 256.0, features)

    model = create_tf_model(model_details,
                            num_classes=len(classes),
                            input_features=normalized_features,
                            freeze=True)

    loss = cross_entropy_with_softmax(model, label)
    metric = classification_error(model, label)
    learner = momentum_sgd(parameters=model.parameters,
                           lr=learning_rate_schedule(learning_rate,
                                                     UnitType.minibatch),
                           momentum=0.9,
                           l2_regularization_weight=0.0005)

    reporter = ProgressPrinter(tag='training', num_epochs=max_amount_of_epochs)

    trainer = Trainer(model=model,
                      criterion=(loss, metric),
                      parameter_learners=[learner],
                      progress_writers=[reporter])

    log_number_of_parameters(model)

    map_input_to_streams_train = {
        features: reader.streams.features,
        label: reader.streams.labels
    }

    training_session(trainer=trainer,
                     mb_source=reader,
                     model_inputs_to_streams=map_input_to_streams_train,
                     mb_size=samples_per_minibatch,
                     progress_frequency=samples_per_epoch,
                     checkpoint_config=CheckpointConfig(
                         frequency=samples_per_epoch,
                         filename=os.path.join("./checkpoints",
                                               "ConvNet_Lego_VisiOn"),
                         restore=True)).train()
    network = {'features': features, 'label': label, 'model': softmax(model)}
    model_name = f"CNN-3200-224-resnet-18.model"
    export_path = os.path.abspath(
        os.path.join("..", "..", "Final models", "CNN", model_name))
    model.save(export_path)
    return network
Exemple #40
0
def test_debug_multi_output():
    input_dim = 2
    num_output_classes = 2

    f_input = input_variable(input_dim, np.float32,
                             needs_gradient=True, name='features')

    p = parameter(shape=(input_dim,), init=10, name='p')

    comb = combine([f_input, p])

    ins = InStream(['n', 'n', 'n', 'n', 'n'])
    outs = OutStream()

    z = times(comb.outputs[0], comb.outputs[1], name='z')
    z = debug_model(z, ins, outs)

    l_input = input_variable(num_output_classes, np.float32, name='labels')
    loss = cross_entropy_with_softmax(z, l_input)
    eval_error = classification_error(z, l_input)

    _train(z, loss, eval_error,
           loss.find_by_name('features'),
           loss.find_by_name('labels'),
           num_output_classes, 1)

    # outs.written contains something like
    # =================================== forward  ===================================
    # Parameter('p', [], [2]) with uid 'Parameter4'
    # Input('features', [#, *], [2]) with uid 'Input3'
    # Times: Output('UserDefinedFunction12_Output_0', [#, *], [2]), Output('UserDefinedFunction15_Output_0', [], [2]) -> Output('z', [#, *], [2 x 2]) with uid 'Times21'
    # =================================== backward ===================================
    # Times: Output('UserDefinedFunction12_Output_0', [#, *], [2]), Output('UserDefinedFunction15_Output_0', [], [2]) -> Output('z', [#, *], [2 x 2]) with uid 'Times21'
    # Input('features', [#, *], [2]) with uid 'Input3'
    # Parameter('p', [], [2]) with uid 'Parameter4'   assert outs.written == out_stuff

    assert len(outs.written) == 8

    v_p = "Parameter('p', "
    v_i = "Input('features'"
    v_t = 'Times: '

    assert outs.written[0].startswith('=') and 'forward' in outs.written[0]
    line_1, line_2, line_3 = outs.written[1:4]

    assert outs.written[4].startswith('=') and 'backward' in outs.written[4]
    line_5, line_6, line_7 = outs.written[5:8]
    assert line_5.startswith(v_t)
    assert line_6.startswith(v_p) and line_7.startswith(v_i) or \
           line_6.startswith(v_i) and line_7.startswith(v_p)
Exemple #41
0
def test_factor_dense_for_prediction():

    input_dim = 2
    num_output_classes = 2
    hidden_layer_dim = 50
    num_minibatches_to_train = 2000
    minibatch_size = 25
    learning_rate = 0.5

    input = C.input_variable(input_dim)
    label = C.input_variable(num_output_classes)

    z = _create_model_dense(input, input_dim, hidden_layer_dim, num_output_classes)

    loss = C.cross_entropy_with_softmax(z, label)
    eval_error = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training

    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(z.parameters, lr_schedule)
    trainer = C.Trainer(z, (loss, eval_error), [learner])


    # Run the trainer and perform model training
    training_progress_output_freq = 20
    plotdata = {"batchsize":[], "loss":[], "error":[]}


    for i in range(0, int(num_minibatches_to_train)):
        features, labels = _generate_random_data_sample(minibatch_size, input_dim, num_output_classes)
        # Specify the input variables mapping in the model to actual minibatch data for training
        trainer.train_minibatch({input : features, label : labels})
    
    # generate some data to predict
    features, labels = _generate_random_data_sample(10, 2, 2)

    # factor the model.
    newz = nc.factor_dense(z, projection_function=_get_rank_reduced_size, filter_function = _filter)
    original_out = C.softmax(z)
    factored_out = C.softmax(newz)

    original_labels_probs = original_out.eval({input : features})
    predicted_label_probs = factored_out.eval({input : features})
    
    original_prediction_percentage = _percentage_match(labels, original_labels_probs) 

    # reduced model should have at leat 50% match compared to the original
    # For the test, we reduced the training minibatches, thus the match is lower.
    assert(original_prediction_percentage * 0.5 <= _percentage_match(labels, predicted_label_probs))
def create_resnet_network(network_name, fp16):
    # Input variables denoting the features and label data
    input_var = C.input_variable((num_channels, image_height, image_width))
    label_var = C.input_variable((num_classes))

    dtype = np.float16 if fp16 else np.float32
    if fp16:
        graph_input = C.cast(input_var, dtype=np.float16)
        graph_label = C.cast(label_var, dtype=np.float16)
    else:
        graph_input = input_var
        graph_label = label_var

    with C.default_options(dtype=dtype):
        stride1x1 = (1, 1)
        stride3x3 = (2, 2)

        # create model, and configure learning parameters
        if network_name == 'resnet18':
            z = create_imagenet_model_basic(graph_input, [2, 1, 1, 2], num_classes)
        elif network_name == 'resnet34':
            z = create_imagenet_model_basic(graph_input, [3, 3, 5, 2], num_classes)
        elif network_name == 'resnet50':
            z = create_imagenet_model_bottleneck(graph_input, [2, 3, 5, 2], num_classes, stride1x1, stride3x3)
        elif network_name == 'resnet101':
            z = create_imagenet_model_bottleneck(graph_input, [2, 3, 22, 2], num_classes, stride1x1, stride3x3)
        elif network_name == 'resnet152':
            z = create_imagenet_model_bottleneck(graph_input, [2, 7, 35, 2], num_classes, stride1x1, stride3x3)
        else:
            return RuntimeError("Unknown model name!")

        # loss and metric
        ce = cross_entropy_with_softmax(z, graph_label)
        errs = classification_error(z, graph_label, topN=1)
        top5Errs = classification_error(z, graph_label, topN=5)

    if fp16:
        ce = C.cast(ce, dtype=np.float32)
        errs = C.cast(errs, dtype=np.float32)
        top5Errs = C.cast(top5Errs, dtype=np.float32)

    return {
        'name' : network_name,
        'feature': input_var,
        'label': label_var,
        'ce' : ce,
        'errs' : errs,
        'top5Errs' : top5Errs,
        'output': z
    }
Exemple #43
0
def test_to_sequence_backprop(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    x_seq_input = C.sequence.input_variable(input_vocab_size, is_sparse=True, name='features')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = model
    label_seq_input = C.sequence.input_variable(num_labels, is_sparse=True, name='labels')
    ce = C.cross_entropy_with_softmax(z, label_seq_input)

    seq1_data = [[0, 1, 1], [0, 1, 0], [1, 0, 0]]
    seq2_data = [[0, 0, 1], [0, 1, 1]]
    seq1_label_data = [[0, 1], [0, 1], [1, 0]]
    seq2_label_data = [[1, 0], [0, 1]]
    label_seq_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data)]
    param_grads_1, loss_result_1 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_seq_input : label_seq_data},
                                           wrt=ce.parameters, outputs=[ce], as_numpy=False)

    # Create a clone of the model that uses a non-sequence input
    # and converts it to a sequence using to_sequence
    x_non_seq_input = C.input_variable((C.FreeDimension, input_vocab_size), is_sparse=True, name='non_seq_features')
    x_seq_lens = C.input_variable((), name='sequence_lengths')
    x_seq = C.to_sequence(x_non_seq_input, x_seq_lens)
    x_seq = C.reconcile_dynamic_axes(C.times(x_seq, np.eye(input_vocab_size, dtype=np.float32)), label_seq_input)
    ce_clone = ce.clone('share', {x_seq_input : x_seq})

    x_non_seq_data = C.NDArrayView.from_csr(_to_csr([seq1_data, seq2_data + [[0, 0, 0]]]), shape=(2, 3, 3))
    x_seq_lens_data = np.asarray([3, 2], dtype=np.float32)

    x_non_seq_input = next(argument for argument in ce_clone.arguments if argument.name == 'non_seq_features')
    label_seq_input = next(argument for argument in ce_clone.arguments if argument.name == 'labels')
    x_seq_lens = next(argument for argument in ce_clone.arguments if argument.name == 'sequence_lengths')
    param_grads_2, loss_result_2 = ce_clone.grad({x_non_seq_input : x_non_seq_data, x_seq_lens : x_seq_lens_data, label_seq_input : label_seq_data},
                                                 wrt=ce_clone.parameters, outputs=[ce_clone], as_numpy=False)


    assert np.array_equal(loss_result_1.as_sequences()[0], loss_result_2.as_sequences()[0])
    assert np.array_equal(loss_result_1.as_sequences()[1], loss_result_2.as_sequences()[1])

    for param in param_grads_1:
        if not param_grads_1[param].is_sparse:
            reference_grad_value = param_grads_1[param].asarray()
            grad_value = param_grads_2[param].asarray()
            assert np.array_equal(reference_grad_value, grad_value)
Exemple #44
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(StreamDefs(
        amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file)))

    ld = HTKMLFDeserializer(label_mapping_file, StreamDefs(
        awesome_labels = StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd,ld])

    features = C.input_variable(((2*context+1)*feature_dim))
    labels = C.input_variable((num_classes))

    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error    (z, labels)

    learner = C.adam_sgd(z.parameters,
                    lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size),
                    momentum=C.momentum_as_time_constant_schedule(1000),
                    low_memory=True,
                    gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True)
    trainer = C.Trainer(z, (ce, errs), learner)

    input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels }

    pp = C.ProgressPrinter(freq=0)
    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
        pp.update_with_trainer(trainer, with_metric=True)
    assert True
    os.chdir(abs_path)
def train(nonlinearity, num_hidden_layers, device_id,
          minibatch_size=10, num_samples=1000):
    from cntk.cntk_py import always_allow_setting_default_device
    always_allow_setting_default_device()
    C.try_set_default_device(cntk_device(device_id))
    np.random.seed(0)

    learning_rate = 0.5
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)

    hidden_layers_dim = 50

    inp = C.input_variable((input_dim), np.float32)
    label = C.input_variable((num_output_classes), np.float32)

    z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim,
                                       num_hidden_layers, nonlinearity)

    loss = C.cross_entropy_with_softmax(z, label)
    eval_error = C.classification_error(z, label)

    learner = C.sgd(z.parameters, lr_schedule)
    trainer = C.Trainer(z, (loss, eval_error), [learner])

    num_minibatches_to_train = int(num_samples / minibatch_size)

    training_progress_output_freq = 20

    losses = []
    errors = []

    for i in range(num_minibatches_to_train):
        features, labels = generate_random_data_sample(minibatch_size,
                                                       input_dim,
                                                       num_output_classes)

        # Specify the input variables mapping in the model to actual minibatch
        # data for training.
        trainer.train_minibatch({inp: features, label: labels},
                                device=cntk_device(device_id))

        batchsize, loss, error = print_training_progress(trainer, i,
                                                         training_progress_output_freq)

        if not (loss == "NA" or error == "NA"):
            losses.append(loss)
            errors.append(error)

    return losses, errors
Exemple #46
0
def test_usermbsource_training(tmpdir, with_checkpoint_impl):
    input_dim = 1000
    num_output_classes = 5

    mbs = MyDataSource(input_dim, num_output_classes)
    # Using this for testing the UserMinibatchSource checkpointing
    if with_checkpoint_impl:
        MBS_CV_CLASS = MyDataSourceWithCheckpoint
    else:
        MBS_CV_CLASS = MyDataSource

    mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes)

    from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \
            classification_error, learning_parameter_schedule_per_sample, sgd, Trainer, \
            training_session, times

    feature = sequence.input_variable(shape=(input_dim,))
    label = C.input_variable(shape=(num_output_classes,))
    p = parameter(shape=(input_dim, num_output_classes), init=10)
    z = times(sequence.reduce_sum(feature), p, name='z')
    ce = cross_entropy_with_softmax(z, label)
    errs = classification_error(z, label)

    #having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed
    #note that training session can end earlier if there is no updates
    lr_per_sample = learning_parameter_schedule_per_sample(0.3)
    learner = sgd(z.parameters, lr_per_sample)
    trainer = Trainer(z, (ce, errs), [learner])
    input_map = {
        feature: mbs.fsi,
        label: mbs.lsi
    }

    session = training_session(
        trainer=trainer, mb_source=mbs,
        model_inputs_to_streams=input_map,
        mb_size=4, max_samples=20,
        cv_config = C.CrossValidationConfig(minibatch_source=mbs_cv, max_samples=10,
            minibatch_size=2)
    )
    session.train()

    assert trainer.total_number_of_samples_seen == 20
    if with_checkpoint_impl:
        assert mbs_cv._restore_from_checkpoint_calls == 1
Exemple #47
0
def train_sequence_classifier():
    input_dim = 2000
    cell_dim = 25
    hidden_dim = 25
    embedding_dim = 50
    num_output_classes = 5

    # Input variables denoting the features and label data
    features = sequence.input_variable(shape=input_dim, is_sparse=True)
    label = input_variable(num_output_classes)

    # Instantiate the sequence classification model
    classifier_output = LSTM_sequence_classifier_net(
        features, num_output_classes, embedding_dim, hidden_dim, cell_dim)

    ce = cross_entropy_with_softmax(classifier_output, label)
    pe = classification_error(classifier_output, label)

    rel_path = ("../../../Tests/EndToEndTests/Text/" +
                "SequenceClassification/Data/Train.ctf")
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path)

    reader = create_reader(path, True, input_dim, num_output_classes)

    input_map = {
            features: reader.streams.features,
            label:    reader.streams.labels
    }

    lr_per_sample = learning_parameter_schedule_per_sample(0.0005)
    # Instantiate the trainer object to drive the model training
    progress_printer = ProgressPrinter(0)
    trainer = Trainer(classifier_output, (ce, pe),
                      sgd(classifier_output.parameters, lr=lr_per_sample),
                      progress_printer)

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200

    for i in range(255):
        mb = reader.next_minibatch(minibatch_size, input_map=input_map)
        trainer.train_minibatch(mb)

    evaluation_average = float(trainer.previous_minibatch_evaluation_average)
    loss_average = float(trainer.previous_minibatch_loss_average)
    return evaluation_average, loss_average
 def create_model(self, frame_mode=False):
     if frame_mode:
         self.feat = cntk.input_variable(shape=(feat_dim,))
         self.label = cntk.input_variable((label_dim,))
         
         net = cntk.layers.Sequential([cntk.layers.Dense(cell_dim), cntk.layers.Dense(label_dim)])
         self.output = net(self.feat)
     else:    
         #sequence mode
         self.feat = cntk.sequence.input_variable(shape=(feat_dim,))
         self.label = cntk.sequence.input_variable((label_dim,))
         
         net = cntk.layers.Sequential([cntk.layers.Recurrence(cntk.layers.LSTM(shape=label_dim, cell_shape=(cell_dim,)))])
         self.output = net(self.feat)
     
     self.ce = cntk.cross_entropy_with_softmax(self.output, self.label)
     self.err = cntk.classification_error(self.output, self.label)
Exemple #49
0
def ffnet(learner, trainer=None):
    inputs = 5
    outputs = 3
    layers = 2
    hidden_dimension = 3

    if trainer is None:
        # input variables denoting the features and label data
        features = C.input_variable((inputs), np.float32)
        label = C.input_variable((outputs), np.float32)

        # Instantiate the feedforward classification model
        my_model = Sequential ([
                        Dense(hidden_dimension, activation=C.sigmoid, init=C.glorot_uniform(seed=98052)),
                        Dense(outputs, init=C.glorot_uniform(seed=98052))])
        z = my_model(features)

        ce = C.cross_entropy_with_softmax(z, label)
        pe = C.classification_error(z, label)

        # Instantiate the trainer object to drive the model training
        progress_printer = ProgressPrinter(0)
        trainer = C.Trainer(z, (ce, pe), [learner(z.parameters)], [progress_printer])
    else:
        features = trainer.loss_function.arguments[0]
        label = trainer.loss_function.arguments[1]

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 100

    aggregate_loss = 0.0
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs, outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features : train_features, label : labels})
        sample_count = trainer.previous_minibatch_sample_count
        aggregate_loss += trainer.previous_minibatch_loss_average * sample_count

    last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen

    test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs)
    avg_error = trainer.test_minibatch({features : test_features, label : test_labels})
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return last_avg_error, avg_error, trainer
Exemple #50
0
def train_eval_logistic_regression_from_file(criterion_name=None,
        eval_name=None, device_id=-1):
    cur_dir = os.path.dirname(__file__)

    # Using data from https://github.com/Microsoft/CNTK/wiki/Tutorial
    train_file = os.path.join(cur_dir, "Train-3Classes.txt")
    test_file = os.path.join(cur_dir, "Test-3Classes.txt")

    X = C.input(2)
    y = C.input(3)
    
    W = C.parameter(value=np.zeros(shape=(3, 2)))
    b = C.parameter(value=np.zeros(shape=(3, 1)))

    out = C.times(W, X) + b
    out.tag = 'output'
    ce = C.cross_entropy_with_softmax(y, out)
    ce.name = criterion_name
    ce.tag = 'criterion'
    eval = C.ops.square_error(y, out)
    eval.tag = 'eval'
    eval.name = eval_name

    # training data readers
    train_reader = C.CNTKTextFormatReader(train_file, randomize=None)

    # testing data readers
    test_reader = C.CNTKTextFormatReader(test_file, randomize=None)

    my_sgd = C.SGDParams(
        epoch_size=0, minibatch_size=25, learning_rates_per_mb=0.1, max_epochs=3)

    with C.LocalExecutionContext('logreg') as ctx:
        ctx.device_id = device_id

        ctx.train(
            root_nodes=[ce, eval], 
            training_params=my_sgd,
            input_map=train_reader.map(X, alias='I', dim=2).map(y, alias='L', dim=3))

        result = ctx.test(
                root_nodes=[ce, eval], 
                input_map=test_reader.map(X, alias='I', dim=2).map(y, alias='L', dim=3))

        return result
Exemple #51
0
def test_lstm_over_lstm_thought_vectors_2(device_id):
    dev = cntk_device(device_id)
    input_vocab_size=3
    emb_dim = 2
    hidden_dim = 2
    num_labels = 2
    utterances_input = C.sequence.input_variable((input_vocab_size), is_sparse=True, name='utterances')
    conversation_lengths_input = C.input_variable((), name='conversation_sequence_lengths')
    label_input = C.sequence.input_variable(num_labels, is_sparse=True, sequence_axis=C.Axis('label_sequence'), name='labels')
    with C.default_options(initial_state=0.1):
        model = C.layers.Embedding(emb_dim, name='embed')(utterances_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.sequence.last(model)
        model = C.user_function(UtteranceBatchReshape(model, conversation_lengths_input))
        model = C.to_sequence_like(model, label_input)
        model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model)
        model = C.layers.Dense(num_labels, name='classify')(model)

    z = model
    ce = C.cross_entropy_with_softmax(z, label_input)

    sentinel_utt_data = C.NDArrayView.from_csr(_to_csr([[0, 0, 1]]), device=C.cpu())
    c1_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0], [1, 0, 0]]), device=C.cpu())
    c1_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1]]), device=C.cpu())
    c1_utt3_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0]]), device=C.cpu())
    c2_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1]]), device=C.cpu())
    c3_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1], [1, 0, 0]]), device=C.cpu())
    c3_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0]]), device=C.cpu())

    all_utt_data = C.Value.create(C.sequence.input_variable((input_vocab_size), is_sparse=True), [c1_utt1_data, c1_utt2_data, c1_utt3_data, c2_utt1_data, sentinel_utt_data, sentinel_utt_data, c3_utt1_data, c3_utt2_data, sentinel_utt_data], device=C.cpu()).data
    conversation_lengths_data = np.asarray([3, 1, 2], dtype=np.float32)
    seq1_label_data = [[0, 1], [0, 1], [1, 0]]
    seq2_label_data = [[1, 0]]
    seq3_label_data = [[1, 0], [0, 1]]
    label_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data), _to_csr(seq3_label_data)]
    param_grads, loss_result = ce.grad({utterances_input : all_utt_data, label_input : label_data, conversation_lengths_input : conversation_lengths_data},
                                       wrt=ce.parameters, outputs=[ce], as_numpy=False)

    loss_result = loss_result.as_sequences()

    absolute_tolerance = 0.01
    assert np.allclose(loss_result[0], [[0.678914], [0.668076], [0.728129]], atol=absolute_tolerance)
    assert np.allclose(loss_result[1], [[0.679029]], atol=absolute_tolerance)
    assert np.allclose(loss_result[2], [[0.705393], [0.674243]], atol=absolute_tolerance)
Exemple #52
0
def create_binary_convolution_model():

    # Input variables denoting the features and label data
    feature_var = C.input((num_channels, image_height, image_width))
    label_var = C.input((num_classes))

    # apply model to input
    scaled_input = C.element_times(C.constant(0.00390625), feature_var)

    # first layer is ok to be full precision
    z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3,3), 128, channels=32, pad=True)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3,3), 128, channels=128, pad=True)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (1,1), num_classes, channels=128, pad=True)
    z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z)
    z = C.reshape(z, (num_classes,))

    # Add binary regularization (ala Gang Hua)
    weight_sum = C.constant(0)
    for p in z.parameters:
        if (p.name == "filter"):
            weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p))))
    bin_reg = C.element_times(.000005, weight_sum)

    # After the last layer, we need to apply a learnable scale
    SP = C.parameter(shape=z.shape, init=0.001)
    z = C.element_times(z, SP)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    ce = C.plus(ce, bin_reg)
    pe = C.classification_error(z, label_var)

    return C.combine([z, ce, pe])
def create_recurrent_network():
    # Input variables denoting the features and label data
    features = sequence.input_variable(((2*context+1)*feature_dim))
    labels = sequence.input_variable((num_classes))

    # create network
    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error    (z, labels)

    return {
        'feature': features,
        'label': labels,
        'ce' : ce,
        'errs' : errs,
        'output': z
    }
Exemple #54
0
def ffnet(optimizer,  num_minibatches_to_train, learning_rate_func, lr_args, learner_kwargs):
    inputs = 2
    outputs = 2
    hidden_dimension = 50

    # input variables denoting the features and label data
    features = C.input_variable((inputs), np.float32)
    label = C.input_variable((outputs), np.float32)

    # Instantiate the feedforward classification model
    my_model = Sequential([
        Dense(hidden_dimension, activation=C.sigmoid,
              init=C.glorot_uniform(seed=SEED)),
        Dense(outputs, init=C.glorot_uniform(seed=SEED))])
    z = my_model(features)

    ce = C.cross_entropy_with_softmax(z, label)
    pe = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training
    lr= learning_rate_func(0.125, *lr_args)
    progress_printer = ProgressPrinter(0)
    learner = optimizer(z.parameters, lr) if optimizer != sgd else sgd(z.parameters, lr, **learner_kwargs)

    trainer = C.Trainer(z, (ce, pe), [learner], progress_printer)

    # Get minibatches of training data and perform model training
    minibatch_size = 25

    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(
            minibatch_size, inputs, outputs)
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({features: train_features, label: labels})

    test_features, test_labels = generate_random_data(
        minibatch_size, inputs, outputs)
    avg_error = trainer.test_minibatch(
        {features: test_features, label: test_labels})
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return z.parameters
def ffnet():
    inputs = 2
    outputs = 2
    layers = 2
    hidden_dimension = 50

    # input variables denoting the features and label data
    features = C.input_variable((inputs), np.float32)
    label = C.input_variable((outputs), np.float32)

    # Instantiate the feedforward classification model
    my_model = Sequential ([
                    Dense(hidden_dimension, activation=C.sigmoid),
                    Dense(outputs)])
    z = my_model(features)

    ce = C.cross_entropy_with_softmax(z, label)
    pe = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training
    lr_per_minibatch = C.learning_parameter_schedule(0.125)
    progress_printer = ProgressPrinter(0)
    trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)], [progress_printer])

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 1024

    aggregate_loss = 0.0
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs, outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features : train_features, label : labels})
        sample_count = trainer.previous_minibatch_sample_count
        aggregate_loss += trainer.previous_minibatch_loss_average * sample_count

    last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen

    test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs)
    avg_error = trainer.test_minibatch({features : test_features, label : test_labels})
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return last_avg_error, avg_error
Exemple #56
0
def train_eval_logistic_regression_with_numpy(criterion_name=None,
        eval_name=None, device_id=-1):

    # for repro and tests :-)
    np.random.seed(1)

    train_X, train_y = synthetic_data(train_N, feature_dim, num_classes)
    test_X, test_y = synthetic_data(test_N, feature_dim, num_classes)

    # Set up the training data for CNTK. Before writing the CNTK configuration,
    # the data will be attached to X.reader.batch and y.reader.batch and then
    # serialized. 
    X = C.input_numpy(train_X)
    y = C.input_numpy(train_y)

    # define our network -- one weight tensor and a bias
    W = C.parameter(value=np.zeros(shape=(num_classes, feature_dim)))
    b = C.parameter(value=np.zeros(shape=(num_classes, 1)))
    out = C.times(W, X) + b

    ce = C.cross_entropy_with_softmax(y, out)
    ce.tag = 'criterion'
    ce.name = criterion_name    
    
    eval = C.ops.cntk1.SquareError(y, out)
    eval.tag = 'eval'
    eval.name = eval_name

    my_sgd = C.SGDParams(epoch_size=0, minibatch_size=25,
            learning_rates_per_mb=0.1, max_epochs=3)

    with C.LocalExecutionContext('logreg', clean_up=True) as ctx:
        ctx.device_id = device_id

        ctx.train(
                root_nodes=[ce,eval], 
                training_params=my_sgd)

        # For testing, we attach the test data to the input nodes.
        X.reader.batch, y.reader.batch = test_X, test_y
        result = ctx.test(root_nodes=[ce,eval])
        return result
Exemple #57
0
def test_udf_checkpointing(tmpdir):
    dev, w_value, c1_value, c2_value, op = build_test_function()

    label = C.constant(np.asarray([[1, 2], [3, 4]]).astype(np.float32))

    loss = C.cross_entropy_with_softmax(op, label)
    eval_error = C.classification_error(op, label)

    lr_schedule = C.learning_rate_schedule(0.5, C.UnitType.minibatch)
    learner = C.sgd(op.parameters, lr_schedule)
    trainer = C.Trainer(op, (loss, eval_error), [learner])

    trainer.train_minibatch({op.arguments[0]: np.random.random((2, 2)).astype(np.float32)}, device=dev)

    filepath = str(tmpdir / 'test_checkpointing.out')

    trainer.save_checkpoint(filepath, external_state={'test': 'test'})

    d = C.cntk_py.Dictionary.load(filepath)
    assert len(d.keys()) != 0
Exemple #58
0
def train_eval_logistic_regression_with_numpy(criterion_name=None,
        eval_name=None, device_id=-1):

    # for repro and tests :-)
    np.random.seed(1)

    N = 500
    d = 250

    # create synthetic data using numpy
    X = np.random.randn(N, d)
    Y = np.random.randint(size=(N, 1), low=0, high=2)
    Y = np.hstack((Y, 1-Y))

    # set up the training data for CNTK
    x = C.input_numpy(X)
    y = C.input_numpy(Y)

    # define our network -- one weight tensor and a bias
    W = C.parameter(value=np.zeros(shape=(2, d)))
    b = C.parameter(value=np.zeros(shape=(2, 1)))
    out = C.times(W, x) + b

    ce = C.cross_entropy_with_softmax(y, out)
    ce.tag = 'criterion'
    ce.name = criterion_name    
    
    eval = C.ops.cntk1.SquareError(y, out)
    eval.tag = 'eval'
    eval.name = eval_name

    my_sgd = C.SGDParams(epoch_size=0, minibatch_size=25, learning_rates_per_mb=0.1, max_epochs=3)
    with C.LocalExecutionContext('logreg') as ctx:
        ctx.device_id = device_id

        ctx.train(
                root_nodes=[ce,eval], 
                training_params=my_sgd)

        result = ctx.test(root_nodes=[ce,eval])
        return result
Exemple #59
0
def ffnet():
    inputs = 2
    outputs = 2
    layers = 2
    hidden_dimension = 50

    # input variables denoting the features and label data
    features = C.input_variable((inputs), np.float32)
    label = C.input_variable((outputs), np.float32)

    # Instantiate the feedforward classification model
    my_model = Sequential ([
                    Dense(hidden_dimension, activation=C.sigmoid),
                    Dense(outputs)])
    z = my_model(features)

    ce = C.cross_entropy_with_softmax(z, label)
    pe = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training
    lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch)
    trainer = C.Trainer(z, ce, pe, [sgd(z.parameters, lr=lr_per_minibatch)])

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 1024

    pp = ProgressPrinter(0)
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs, outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features : train_features, label : labels})
        pp.update_with_trainer(trainer)

    last_avg_error = pp.avg_loss_since_start()

    test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs)
    avg_error = trainer.test_minibatch({features : test_features, label : test_labels})
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return last_avg_error, avg_error