Exemple #1
0
def create_model(X):
    # defines few stacked GRUs

    l1 = Recurrence(step_function=GRU(shape=20))(X)
    l2 = Recurrence(step_function=GRU(shape=20))(l1)
    l3 = Dense(shape=1)(l2)

    return l3
Exemple #2
0
 def model(self, x):
     param1 = 500
     param2 = 250
     x = Dense(param1, activation=cntk.tanh)(x)
     x = Dense(param1, activation=cntk.tanh)(x)
     x = Dense(param1, activation=cntk.tanh)(x)
     x = Sequential([(Recurrence(LSTM(param2)), Recurrence(LSTM(param2), go_backwards=True)), cntk.splice])(x)
     x = Sequential([(Recurrence(LSTM(param2)), Recurrence(LSTM(param2), go_backwards=True)), cntk.splice])(x)
     x = Dense(self.dim_y)(x)
     return x
Exemple #3
0
def LSTM_sequence_classifier_net(input, num_output_classes, embedding_dim,
                                LSTM_dim, cell_dim):
    lstm_classifier = Sequential([Embedding(embedding_dim),
                                  Recurrence(LSTM(LSTM_dim, cell_dim)),
                                  sequence.last,
                                  Dense(num_output_classes)])
    return lstm_classifier(input)
Exemple #4
0
def create_model(input_sequence, label_sequence, vocab_dim, hidden_dim):
    # Create the rnn that computes the latent representation for the next token.
    rnn_with_latent_output = Sequential([
        C.layers.Embedding(hidden_dim),
        For(
            range(num_layers), lambda: Sequential([
                Stabilizer(),
                Recurrence(LSTM(hidden_dim), go_backwards=False)
            ])),
    ])

    # Apply it to the input sequence.
    latent_vector = rnn_with_latent_output(input_sequence)

    # Connect the latent output to (sampled/full) softmax.
    if use_sampled_softmax:
        weights = load_sampling_weights(token_frequencies_file_path)
        smoothed_weights = np.float32(np.power(weights, alpha))
        sampling_weights = C.reshape(C.Constant(smoothed_weights),
                                     shape=(1, vocab_dim))
        z, ce, errs = cross_entropy_with_sampled_softmax(
            latent_vector, label_sequence, vocab_dim, hidden_dim,
            softmax_sample_size, sampling_weights)
    else:
        z, ce, errs = cross_entropy_with_full_softmax(latent_vector,
                                                      label_sequence,
                                                      vocab_dim, hidden_dim)

    return z, ce, errs
Exemple #5
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(
        StreamDefs(amazing_features=StreamDef(
            shape=feature_dim, context=(context, context), scp=features_file)))

    ld = HTKMLFDeserializer(
        label_mapping_file,
        StreamDefs(
            awesome_labels=StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd, ld])

    features = C.input_variable(((2 * context + 1) * feature_dim))
    labels = C.input_variable((num_classes))

    model = Sequential(
        [For(range(3), lambda: Recurrence(LSTM(256))),
         Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error(z, labels)

    learner = C.adam_sgd(z.parameters,
                         lr=C.learning_rate_schedule(lr, C.UnitType.sample,
                                                     epoch_size),
                         momentum=C.momentum_as_time_constant_schedule(1000),
                         low_memory=True,
                         gradient_clipping_threshold_per_sample=15,
                         gradient_clipping_with_truncation=True)
    trainer = C.Trainer(z, (ce, errs), learner)

    input_map = {
        features: reader.streams.amazing_features,
        labels: reader.streams.awesome_labels
    }

    pp = C.ProgressPrinter(freq=0)
    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
        pp.update_with_trainer(trainer, with_metric=True)
    assert True
    os.chdir(abs_path)
Exemple #6
0
def test_ctc_encoder_train_and_network_output_to_labels():
    # test CTC encoder in training loop and CTCEncoder.network_output_to_labels

    a = C.sequence.input_variable(10)
    labels = ['a', 'b', 'c']
    encoder = CTCEncoder(labels)

    labels_tensor = C.sequence.input_variable(len(
        encoder.classes_))  # number of classes = 4
    input_tensor = C.sequence.input_variable(100)

    prediction_tensor = Dense(4)(Recurrence(LSTM(100))(
        C.ones_like(input_tensor)))

    labels_graph = C.labels_to_graph(labels_tensor)

    fb = C.forward_backward(labels_graph,
                            prediction_tensor,
                            blankTokenId=encoder.blankTokenId)

    ground_truth = ['a', 'b', 'b', 'b', 'c']
    seq_length = 10  # must be the same length as the sequence length in network_out

    pred = np.array([
        [0., 2., 0., 0.],
        [0., 2., 0., 0.],
        [0., 0., 2., 0.],
        [2., 0., 0., 0.],
        [0., 0., 2., 0.],
        [2., 0., 0., 0.],
        [0., 0., 2., 0.],
        [2., 0., 0., 0.],
        [0., 0., 0., 2.],
        [0., 0., 0., 2.],
    ]).astype(np.float32)

    n = np.random.random((10, 100)).astype(np.float32)

    # result = fb.eval({labels_tensor: [encoder.transform(ground_truth, seq_length=seq_length)],
    #                   input_tensor: [n]})

    # print(result)

    adam = C.adam(prediction_tensor.parameters, 0.01, 0.912)
    trainer = C.Trainer(prediction_tensor, (fb, ), [adam])

    for i in range(300):
        trainer.train_minibatch({
            labels_tensor:
            [encoder.transform(ground_truth, seq_length=seq_length)],
            input_tensor: [n]
        })

        # print(trainer.previous_minibatch_loss_average)

    result = prediction_tensor.eval({input_tensor: [n]})
    assert encoder.network_output_to_labels(result[0],
                                            squash_repeat=True) == ground_truth
Exemple #7
0
def create_model(output_dim):

    return Sequential([
        For(
            range(num_layers), lambda: Sequential([
                Stabilizer(),
                Recurrence(LSTM(hidden_dim), go_backwards=False)
            ])),
        Dense(output_dim)
    ])
Exemple #8
0
def create_model():
    '''
    Creates the model to train
    :return: Returns the last output of a sequential model using LSTMs
    '''
    return Sequential([
        For(range(NUMBER_LAYERS),
            lambda: Sequential([Recurrence(LSTM(HIDDEN_LAYER_DIMENSIONS))])),
        sequence.last,
        Dense(NUM_OUTPUT_CLASSES)
    ])
Exemple #9
0
def test_recurrence():
    inputAxis = Axis('inputAxis')
    stateAxis = Axis('stateAxis')
    InputSequence = SequenceOver[inputAxis]
    StateSequence = SequenceOver[stateAxis]

    # input and expected for both tests below
    x = np.reshape(np.arange(0, 25, dtype=np.float32), (1, 5, 5))
    exp = [[0.239151, 0.239151, 0.239151, 0.239151, 0.239151],
           [0.338713, 0.338713, 0.338713, 0.338713, 0.338713],
           [0.367456, 0.367456, 0.367456, 0.367456, 0.367456],
           [0.375577, 0.375577, 0.375577, 0.375577, 0.375577],
           [0.377891, 0.377891, 0.377891, 0.377891, 0.377891]]

    ####################################################
    # Test 1: Recurrence(): initial state is constant
    ####################################################
    # Note: We cannot use random init of the GRU parameters because random numbers will
    # depend on what previous tests were run. Hence, use a constant (which is not realistic).
    # TODO: Find out how to reset the random generator, then remove the constant init.
    R = Recurrence(GRU(5, init=0.05), go_backwards=False, initial_state=0.1)

    @Function
    @Signature(InputSequence[Tensor[5]])
    def F(x):
        return R(x)

    rt = F(x)
    np.testing.assert_array_almost_equal(
        rt[0], exp, decimal=6, err_msg='Error in Recurrence(GRU()) forward')

    ####################################################
    # Test 2: RecurrenceFrom(): initial state is data input
    ####################################################
    RF = RecurrenceFrom(GRU(5, init=0.05), go_backwards=False)

    @Function
    @Signature(s=StateSequence[Tensor[5]], x=InputSequence[Tensor[5]])
    def FF(s, x):
        return RF(s, x)

    s = np.ones(
        (1, 5, 5)
    ) * 0.1  # we pass the same value as the constant in the previous test to make the result the same
    rt = FF(s, x)
    np.testing.assert_array_almost_equal(
        rt[0],
        exp,
        decimal=6,
        err_msg='Error in RecurrenceFrom(GRU()) forward')
Exemple #10
0
def test_large_model_serialization_float(tmpdir):
    import os
    from cntk.layers import Recurrence, LSTM, Dense

    type_size = np.dtype(np.float32).itemsize
    two_gb = 2**31
    size = (2097152 + 4, 256, 512, 4096)
    assert size[0] * size[1] * type_size > two_gb

    device = C.device.cpu()
    i = C.sequence.input(size[0])
    w = C.Parameter((size[0], size[1]),
                    init=C.uniform(3.0, seed=12345),
                    device=device)
    e = C.times(i, w)

    h_fwd = Recurrence(LSTM(size[2]))(e)
    h_bwd = Recurrence(LSTM(size[2]), go_backwards=True)(e)
    h_last_fwd = C.sequence.last(h_fwd)
    h_first_bwd = C.sequence.first(h_bwd)
    t = C.splice(h_last_fwd, h_first_bwd)

    z1 = Dense(size[2], activation=C.relu)(t)
    z = Dense(2, activation=None)(z1)

    filename = str(tmpdir / 'test_large_model_serialization_float.out')
    z.save(filename)

    assert os.path.getsize(filename) > two_gb

    y = C.Function.load(filename, device=device)

    assert (len(z.parameters) == len(y.parameters))

    for param_pair in zip(z.parameters, y.parameters):
        assert param_pair[0].shape == param_pair[1].shape
        assert np.allclose(param_pair[0].value, param_pair[1].value)
Exemple #11
0
    def __init__(self, n_in, n_out, init_lr, momentum):

        self.param1 = 512
        self.param2 = 256

        self.n_in = int(n_in)
        self.n_out = int(n_out)
        self.input = C.sequence.input_variable(shape=(self.n_in, ))
        self.label = C.sequence.input_variable(shape=(self.n_out, ))

        self.three_dnn = Sequential([
            Dense(self.param1, activation=C.tanh),
            Dense(self.param1, activation=C.tanh),
            Dense(self.param1, activation=C.tanh)
        ])
        self.rnn_layer1 = Sequential([(Recurrence(LSTM(self.param2)),
                                       Recurrence(LSTM(self.param2),
                                                  go_backwards=True)),
                                      C.splice])
        self.rnn_layer2 = Sequential([(Recurrence(LSTM(self.param2)),
                                       Recurrence(LSTM(self.param2),
                                                  go_backwards=True)),
                                      C.splice])
        self.final_dnn = Dense(self.n_out)

        self.output = self.model(self.input)

        self.loss = loss_fun(self.output, self.label)
        self.eval_err = loss_fun(self.output, self.label)

        self.lr_s = C.learning_rate_schedule(init_lr, C.UnitType.sample)
        self.mom_s = C.momentum_schedule(momentum)
        self.learner = C.momentum_sgd(self.output.parameters,
                                      lr=self.lr_s,
                                      momentum=self.mom_s)
        self.trainer = C.Trainer(self.output, (self.loss, self.eval_err),
                                 [self.learner])
def create_recurrent_network():
    # Input variables denoting the features and label data
    features = sequence.input(((2*context+1)*feature_dim))
    labels = sequence.input((num_classes))

    # create network
    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error    (z, labels)

    return {
        'feature': features,
        'label': labels,
        'ce' : ce,
        'errs' : errs,
        'output': z
    }
Exemple #13
0
def test_recurrent_block(block_type, block_outputs_count, block_size, W_mult, H_mult, expected_res):
    input_shape = 4

    sequenceAxis = Axis('sequenceAxis')

    y = input(input_shape, dynamic_axes=[Axis.default_batch_axis(), sequenceAxis])
    data = np.reshape(np.arange(0,16, dtype=np.float32), (1,4,4))

    rnn_block = block_type(block_size, init=0.1)

    assert len(rnn_block.outputs) == block_outputs_count
    rnn_net = Recurrence(rnn_block)(y)

    assert rnn_net.b.shape == (W_mult*block_size,)
    assert rnn_net.W.shape == (input_shape, W_mult*block_size)
    assert rnn_net.H.shape == (block_size, H_mult*block_size)

    res = rnn_net.eval(data)
    expected = np.asarray(expected_res, dtype=np.float32)

    np.testing.assert_array_almost_equal(res[0], expected, decimal=6)
def test_recurrence_step_fun():
    import cntk as C

    def step_f(prev1, x):
        return prev1 * x

    rec = Recurrence(step_f)

    def step_f(prev1, prev2, x):
        return prev1 * prev2 * x, prev1 * x

    rec = Recurrence(step_f)

    def step_f(prev1, prev2, prev3, x):
        return prev1 * prev2 * prev3 * x, prev1 * x, prev2 * x

    rec = Recurrence(step_f)

    with pytest.raises(ValueError):

        def step_f(prev1, prev2, prev3, prev4, x):
            return prev1 * prev2 * prev3 * x, prev1 * x, prev2 * x, prev4 * x

        rec = Recurrence(step_f)

    with pytest.raises(TypeError):
        v = C.input_variable((1), name='additional_input_variable')
        step_f = lambda prev, x: prev * v * x
        rec = Recurrence(step_f)

    with pytest.raises(TypeError):

        def step_f(prev1, x):
            p = C.Parameter((1))
            return prev1 * x * p

        rec = Recurrence(step_f)
Exemple #15
0
def bidirectional_recurrence(fwd, bwd):
    f = Recurrence(fwd)
    g = Recurrence(bwd, go_backwards=True)
    x = cntk.placeholder()
    return cntk.splice(f(x), g(x))
    X = np.eye(vocab_size, dtype=np.float32)[xi]
    Y = np.eye(vocab_size, dtype=np.float32)[yi]

    return [X], [Y]


get_sample(0)

input_sequence = sequence.input_variable(shape=vocab_size)
label_sequence = sequence.input_variable(shape=vocab_size)

model = Sequential([
    For(
        range(2), lambda: Sequential(
            [Stabilizer(),
             Recurrence(LSTM(256), go_backwards=False)])),
    Dense(vocab_size)
])

z = model(input_sequence)
z_sm = cntk.softmax(z)

ce = cross_entropy_with_softmax(z, label_sequence)
errs = classification_error(z, label_sequence)

lr_per_sample = learning_rate_schedule(0.001, UnitType.sample)
momentum_time_constant = momentum_as_time_constant_schedule(1100)
clipping_threshold_per_sample = 5.0
gradient_clipping_with_truncation = True
learner = momentum_sgd(
    z.parameters,
Exemple #17
0
    X = np.eye(vocab_size, dtype=np.float32)[xi]
    Y = np.eye(vocab_size, dtype=np.float32)[yi]

    return [X], [Y]
sample(0)


input_seq_axis = Axis('inputAxis')
input_sequence = sequence.input_variable(shape=vocab_size, sequence_axis=input_seq_axis)
label_sequence = sequence.input_variable(shape=vocab_size, sequence_axis=input_seq_axis)

# model = Sequential([Dense(300),Dense(vocab_size)])

model = Sequential([
        For(range(2), lambda:
                   Sequential([Stabilizer(), Recurrence(LSTM(256), go_backwards=False)])),
        Dense(vocab_size)])

z = model(input_sequence)

ce = cross_entropy_with_softmax(z, label_sequence)
errs = classification_error(z, label_sequence)

lr_per_sample = learning_rate_schedule(0.001, UnitType.sample)
momentum_time_constant = momentum_as_time_constant_schedule(1100)
clipping_threshold_per_sample = 5.0
gradient_clipping_with_truncation = True
learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                    gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
                    gradient_clipping_with_truncation=gradient_clipping_with_truncation)
progress_printer = ProgressPrinter(freq=100, tag='Training')
def array(vals):
    return np.array(vals, dtype=np.float32)


if __name__ == '__main__':

    # TODO: add all Layers tests here and use the correct pytest pattern

    # ----------------------------------------------
    # Recurrence() over regular function
    # ----------------------------------------------

    from cntk.layers import Recurrence
    from cntk.ops import plus
    from cntk.debugging import *
    r = Recurrence(plus)
    dump_function(r)
    r.update_signature(1)
    dump_function(r)
    data = [  # simple sequence
        array([[2], [6], [4], [8], [6]])
    ]
    #out = r(data)
    # BUGBUG: fails with "ValueError: Variable(Plus5_output) with unknown shape detected when compiling the Function graph!"
    #print(out)

    # ----------------------------------------------
    # sequential convolution without reduction dimension
    # ----------------------------------------------

    from cntk.layers import Convolution
        out = net(w_a)[0]
        if (out[1]>0.5 and l==-1): correct+=1
        if (out[0]>0.5 and l==1): correct+=1
        total+=1
    print("{} out of {} correct ({}%)".format(correct,total,correct/total*100))

z_sm = C.softmax(z)
check(z_sm)

# Now implement simple RNN
words_arr1 = [to_onehot(list(map(char_to_num,list(w)))) for w in words]

input_var = sequence.input_variable(vocab_size)
label_var = C.input_variable(2)

model = Sequential([Recurrence(C.layers.RNNStep(200,activation=C.relu)),sequence.last,Dense(100,activation=C.relu),Dense(2)])

z = model(input_var)
z_sm = C.softmax(z)

ce = cross_entropy_with_softmax(z, label_var)
errs = classification_error(z, label_var)

lr_per_sample = learning_rate_schedule(0.02, UnitType.minibatch)
learner = C.learners.sgd(z.parameters, lr_per_sample)
progress_printer = ProgressPrinter(freq=100, tag='Training')
trainer = Trainer(z, (ce, errs), learner, progress_printer)

log_number_of_parameters(z)

minibatch_size = 10