예제 #1
0
def test_birecurrence():
    dim = 10
    hidden_dim = 30

    a = C.sequence.input_variable(dim)
    b = BiRecurrence(LSTM(hidden_dim), weight_tie=False)(a)

    assert b.shape == (hidden_dim * 2, )

    c = BiRecurrence(LSTM(hidden_dim), weight_tie=True)(a)

    assert c.shape == b.shape
    assert len(b.parameters) == 3 + 3
    assert len(c.parameters) == 3 + 4
    assert c.f_token0.shape == c.b_token0.shape == (hidden_dim, )
    assert c.f_token1.shape == c.b_token1.shape == (hidden_dim, )

    d = BiRecurrence(IndyLSTM(hidden_dim), weight_tie=True)(a)

    assert d.shape == b.shape
    assert len(b.parameters) == 3 + 3
    assert len(d.parameters) == 3 + 4
    assert d.f_token0.shape == d.b_token0.shape == (hidden_dim, )
    assert d.f_token1.shape == d.b_token1.shape == (hidden_dim, )

    n = [
        np.random.random((5, 10)).astype(np.float32),
        np.random.random((7, 10)).astype(np.float32),
    ]

    c.eval({a: n})
    b.eval({a: n})
예제 #2
0
 def model(self, x):
     param1 = 500
     param2 = 250
     x = Dense(param1, activation=cntk.tanh)(x)
     x = Dense(param1, activation=cntk.tanh)(x)
     x = Dense(param1, activation=cntk.tanh)(x)
     x = Sequential([(Recurrence(LSTM(param2)), Recurrence(LSTM(param2), go_backwards=True)), cntk.splice])(x)
     x = Sequential([(Recurrence(LSTM(param2)), Recurrence(LSTM(param2), go_backwards=True)), cntk.splice])(x)
     x = Dense(self.dim_y)(x)
     return x
예제 #3
0
def test_pyramidal_bi_recurrence():
    dim = 10
    width = 2
    hidden_dim = 30
    seq_length = 16
    a = C.sequence.input_variable(dim)
    b = PyramidalBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), width)(a)

    assert b.shape == (hidden_dim * 2 * width, )

    n = np.random.random((1, 16, 10)).astype(np.float32)
    result = b.eval({a: n})[0]

    assert result.shape == (seq_length / width, hidden_dim * 2 * width)
예제 #4
0
def create_model(input_sequence, label_sequence, vocab_dim, hidden_dim):
    # Create the rnn that computes the latent representation for the next token.
    rnn_with_latent_output = Sequential([
        C.layers.Embedding(hidden_dim),
        For(
            range(num_layers), lambda: Sequential([
                Stabilizer(),
                Recurrence(LSTM(hidden_dim), go_backwards=False)
            ])),
    ])

    # Apply it to the input sequence.
    latent_vector = rnn_with_latent_output(input_sequence)

    # Connect the latent output to (sampled/full) softmax.
    if use_sampled_softmax:
        weights = load_sampling_weights(token_frequencies_file_path)
        smoothed_weights = np.float32(np.power(weights, alpha))
        sampling_weights = C.reshape(C.Constant(smoothed_weights),
                                     shape=(1, vocab_dim))
        z, ce, errs = cross_entropy_with_sampled_softmax(
            latent_vector, label_sequence, vocab_dim, hidden_dim,
            softmax_sample_size, sampling_weights)
    else:
        z, ce, errs = cross_entropy_with_full_softmax(latent_vector,
                                                      label_sequence,
                                                      vocab_dim, hidden_dim)

    return z, ce, errs
예제 #5
0
def LSTM_sequence_classifier_net(input, num_output_classes, embedding_dim,
                                LSTM_dim, cell_dim):
    lstm_classifier = Sequential([Embedding(embedding_dim),
                                  Recurrence(LSTM(LSTM_dim, cell_dim)),
                                  sequence.last,
                                  Dense(num_output_classes)])
    return lstm_classifier(input)
예제 #6
0
def test_ctc_encoder_train_and_network_output_to_labels():
    # test CTC encoder in training loop and CTCEncoder.network_output_to_labels

    a = C.sequence.input_variable(10)
    labels = ['a', 'b', 'c']
    encoder = CTCEncoder(labels)

    labels_tensor = C.sequence.input_variable(len(
        encoder.classes_))  # number of classes = 4
    input_tensor = C.sequence.input_variable(100)

    prediction_tensor = Dense(4)(Recurrence(LSTM(100))(
        C.ones_like(input_tensor)))

    labels_graph = C.labels_to_graph(labels_tensor)

    fb = C.forward_backward(labels_graph,
                            prediction_tensor,
                            blankTokenId=encoder.blankTokenId)

    ground_truth = ['a', 'b', 'b', 'b', 'c']
    seq_length = 10  # must be the same length as the sequence length in network_out

    pred = np.array([
        [0., 2., 0., 0.],
        [0., 2., 0., 0.],
        [0., 0., 2., 0.],
        [2., 0., 0., 0.],
        [0., 0., 2., 0.],
        [2., 0., 0., 0.],
        [0., 0., 2., 0.],
        [2., 0., 0., 0.],
        [0., 0., 0., 2.],
        [0., 0., 0., 2.],
    ]).astype(np.float32)

    n = np.random.random((10, 100)).astype(np.float32)

    # result = fb.eval({labels_tensor: [encoder.transform(ground_truth, seq_length=seq_length)],
    #                   input_tensor: [n]})

    # print(result)

    adam = C.adam(prediction_tensor.parameters, 0.01, 0.912)
    trainer = C.Trainer(prediction_tensor, (fb, ), [adam])

    for i in range(300):
        trainer.train_minibatch({
            labels_tensor:
            [encoder.transform(ground_truth, seq_length=seq_length)],
            input_tensor: [n]
        })

        # print(trainer.previous_minibatch_loss_average)

    result = prediction_tensor.eval({input_tensor: [n]})
    assert encoder.network_output_to_labels(result[0],
                                            squash_repeat=True) == ground_truth
예제 #7
0
def test_htk_deserializers():
    mbsize = 640
    epoch_size = 1000 * mbsize
    lr = [0.001]

    feature_dim = 33
    num_classes = 132
    context = 2

    os.chdir(data_path)

    features_file = "glob_0000.scp"
    labels_file = "glob_0000.mlf"
    label_mapping_file = "state.list"

    fd = HTKFeatureDeserializer(
        StreamDefs(amazing_features=StreamDef(
            shape=feature_dim, context=(context, context), scp=features_file)))

    ld = HTKMLFDeserializer(
        label_mapping_file,
        StreamDefs(
            awesome_labels=StreamDef(shape=num_classes, mlf=labels_file)))

    reader = MinibatchSource([fd, ld])

    features = C.input_variable(((2 * context + 1) * feature_dim))
    labels = C.input_variable((num_classes))

    model = Sequential(
        [For(range(3), lambda: Recurrence(LSTM(256))),
         Dense(num_classes)])
    z = model(features)
    ce = C.cross_entropy_with_softmax(z, labels)
    errs = C.classification_error(z, labels)

    learner = C.adam_sgd(z.parameters,
                         lr=C.learning_rate_schedule(lr, C.UnitType.sample,
                                                     epoch_size),
                         momentum=C.momentum_as_time_constant_schedule(1000),
                         low_memory=True,
                         gradient_clipping_threshold_per_sample=15,
                         gradient_clipping_with_truncation=True)
    progress_printer = C.ProgressPrinter(freq=0)
    trainer = C.Trainer(z, (ce, errs), learner, progress_printer)

    input_map = {
        features: reader.streams.amazing_features,
        labels: reader.streams.awesome_labels
    }

    # just run and verify it doesn't crash
    for i in range(3):
        mb_data = reader.next_minibatch(mbsize, input_map=input_map)
        trainer.train_minibatch(mb_data)
    assert True
    os.chdir(abs_path)
예제 #8
0
def create_model(output_dim):

    return Sequential([
        For(
            range(num_layers), lambda: Sequential([
                Stabilizer(),
                Recurrence(LSTM(hidden_dim), go_backwards=False)
            ])),
        Dense(output_dim)
    ])
예제 #9
0
def create_model():
    '''
    Creates the model to train
    :return: Returns the last output of a sequential model using LSTMs
    '''
    return Sequential([
        For(range(NUMBER_LAYERS),
            lambda: Sequential([Recurrence(LSTM(HIDDEN_LAYER_DIMENSIONS))])),
        sequence.last,
        Dense(NUM_OUTPUT_CLASSES)
    ])
예제 #10
0
def create_network():
    input_var = cntk.sequence.input_variable((num_channels, frame_height, frame_width), name='input_var')
    target_var = cntk.input_variable((num_classes,), is_sparse=True, name='target_var')

    with cntk.layers.default_options(enable_self_stabilization=True):
        model = Sequential([
            resnet_model(cntk.placeholder()), Label('resnet'),
            Dense(hidden_dim, name='cnn_fc'),
            cntk.layers.Stabilizer(),
            bidirectional_recurrence(LSTM(hidden_dim // 2), LSTM(hidden_dim // 2)),
            cntk.sequence.last,
            BatchNormalization(),
            Dense(num_classes)
        ])(input_var)

    return {
        'input': input_var,
        'target': target_var,
        'model': model,
        'loss': cntk.cross_entropy_with_softmax(model, target_var),
        'metric': cntk.classification_error(model, target_var)
    }
예제 #11
0
def test_large_model_serialization_float(tmpdir):
    import os
    from cntk.layers import Recurrence, LSTM, Dense

    type_size = np.dtype(np.float32).itemsize
    two_gb = 2**31
    size = (2097152 + 4, 256, 512, 4096)
    assert size[0] * size[1] * type_size > two_gb

    device = C.device.cpu()
    i = C.sequence.input(size[0])
    w = C.Parameter((size[0], size[1]),
                    init=C.uniform(3.0, seed=12345),
                    device=device)
    e = C.times(i, w)

    h_fwd = Recurrence(LSTM(size[2]))(e)
    h_bwd = Recurrence(LSTM(size[2]), go_backwards=True)(e)
    h_last_fwd = C.sequence.last(h_fwd)
    h_first_bwd = C.sequence.first(h_bwd)
    t = C.splice(h_last_fwd, h_first_bwd)

    z1 = Dense(size[2], activation=C.relu)(t)
    z = Dense(2, activation=None)(z1)

    filename = str(tmpdir / 'test_large_model_serialization_float.out')
    z.save(filename)

    assert os.path.getsize(filename) > two_gb

    y = C.Function.load(filename, device=device)

    assert (len(z.parameters) == len(y.parameters))

    for param_pair in zip(z.parameters, y.parameters):
        assert param_pair[0].shape == param_pair[1].shape
        assert np.allclose(param_pair[0].value, param_pair[1].value)
예제 #12
0
    def __init__(self, n_in, n_out, init_lr, momentum):

        self.param1 = 512
        self.param2 = 256

        self.n_in = int(n_in)
        self.n_out = int(n_out)
        self.input = C.sequence.input_variable(shape=(self.n_in, ))
        self.label = C.sequence.input_variable(shape=(self.n_out, ))

        self.three_dnn = Sequential([
            Dense(self.param1, activation=C.tanh),
            Dense(self.param1, activation=C.tanh),
            Dense(self.param1, activation=C.tanh)
        ])
        self.rnn_layer1 = Sequential([(Recurrence(LSTM(self.param2)),
                                       Recurrence(LSTM(self.param2),
                                                  go_backwards=True)),
                                      C.splice])
        self.rnn_layer2 = Sequential([(Recurrence(LSTM(self.param2)),
                                       Recurrence(LSTM(self.param2),
                                                  go_backwards=True)),
                                      C.splice])
        self.final_dnn = Dense(self.n_out)

        self.output = self.model(self.input)

        self.loss = loss_fun(self.output, self.label)
        self.eval_err = loss_fun(self.output, self.label)

        self.lr_s = C.learning_rate_schedule(init_lr, C.UnitType.sample)
        self.mom_s = C.momentum_schedule(momentum)
        self.learner = C.momentum_sgd(self.output.parameters,
                                      lr=self.lr_s,
                                      momentum=self.mom_s)
        self.trainer = C.Trainer(self.output, (self.loss, self.eval_err),
                                 [self.learner])
def create_recurrent_network():
    # Input variables denoting the features and label data
    features = sequence.input(((2*context+1)*feature_dim))
    labels = sequence.input((num_classes))

    # create network
    model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))),
                        Dense(num_classes)])
    z = model(features)
    ce = cross_entropy_with_softmax(z, labels)
    errs = classification_error    (z, labels)

    return {
        'feature': features,
        'label': labels,
        'ce' : ce,
        'errs' : errs,
        'output': z
    }
예제 #14
0
    X = np.eye(vocab_size, dtype=np.float32)[xi]
    Y = np.eye(vocab_size, dtype=np.float32)[yi]

    return [X], [Y]


get_sample(0)

input_sequence = sequence.input_variable(shape=vocab_size)
label_sequence = sequence.input_variable(shape=vocab_size)

model = Sequential([
    For(
        range(2), lambda: Sequential(
            [Stabilizer(),
             Recurrence(LSTM(256), go_backwards=False)])),
    Dense(vocab_size)
])

z = model(input_sequence)
z_sm = cntk.softmax(z)

ce = cross_entropy_with_softmax(z, label_sequence)
errs = classification_error(z, label_sequence)

lr_per_sample = learning_rate_schedule(0.001, UnitType.sample)
momentum_time_constant = momentum_as_time_constant_schedule(1100)
clipping_threshold_per_sample = 5.0
gradient_clipping_with_truncation = True
learner = momentum_sgd(
    z.parameters,
예제 #15
0
    X = np.eye(vocab_size, dtype=np.float32)[xi]
    Y = np.eye(vocab_size, dtype=np.float32)[yi]

    return [X], [Y]
sample(0)


input_seq_axis = Axis('inputAxis')
input_sequence = sequence.input_variable(shape=vocab_size, sequence_axis=input_seq_axis)
label_sequence = sequence.input_variable(shape=vocab_size, sequence_axis=input_seq_axis)

# model = Sequential([Dense(300),Dense(vocab_size)])

model = Sequential([
        For(range(2), lambda:
                   Sequential([Stabilizer(), Recurrence(LSTM(256), go_backwards=False)])),
        Dense(vocab_size)])

z = model(input_sequence)

ce = cross_entropy_with_softmax(z, label_sequence)
errs = classification_error(z, label_sequence)

lr_per_sample = learning_rate_schedule(0.001, UnitType.sample)
momentum_time_constant = momentum_as_time_constant_schedule(1100)
clipping_threshold_per_sample = 5.0
gradient_clipping_with_truncation = True
learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
                    gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
                    gradient_clipping_with_truncation=gradient_clipping_with_truncation)
progress_printer = ProgressPrinter(freq=100, tag='Training')
예제 #16
0
def model_lstm(input_tensor, hidden_dim):
    hidden = Recurrence(LSTM(hidden_dim))(input_tensor)
    prediction = Dense(1)(C.sequence.last(hidden))
    return prediction