Beispiel #1
0
    def attention(query, key, value):
        dk = C.reduce_sum(C.ones_like(query))  # cannot use sequence.last, will conflict with recurrence
        # dk: [#, *] [1, ] and value = int(dim_of_query)

        unpacked_key = C.sequence.unpack(key, padding_value=0, no_mask_output=True)  # [#] [-3, key_dim]
        unpacked_value = C.sequence.unpack(value, padding_value=0, no_mask_output=True)  # [#] [-3, value_dim]

        broadcasted_key = C.sequence.broadcast_as(unpacked_key, query)  # [#, *] [-3, key_dim]
        scaled = C.times_transpose(query, broadcasted_key) / dk
        # [#, *] [q_dim] @ [#, *] [key_dim, -3], assert q_dim == key_dim
        # scaled: [#, *] [-3, ] => for every key seq element, there is a corresponding score

        # masked out invalid temporal connections to obey_sequence_order
        if obey_sequence_order and max_seq_len:
            unpacked_scaled, scaled_mask = C.sequence.unpack(scaled, padding_value=0).outputs
            # unpacked_scaled: [#] [-3, -3]  <== matrix will be top right diagonally zero-ed
            # scaled_mask: [#] [-3,]

            minus_inf = C.constant(-1e+30)
            valid_connections = C.Constant(np.tril(np.ones((max_seq_len, max_seq_len)), k=0))  # [] [max_seq, max_seq]
            valid_connections = C.reconcile_dynamic_axes(valid_connections, unpacked_scaled)  # [#] [max_seq, max_seq]
            valid_connections = C.crop_manual(valid_connections, unpacked_scaled, 0, 0)  # [#] [-3, -3]
            unpacked_scaled = C.element_select(valid_connections, unpacked_scaled, minus_inf)  # [#] [-3, -3]
            scaled = C.to_sequence_like(unpacked_scaled, query)  # [#, *] [-3]

        elif obey_sequence_order and not max_seq_len:
            raise ValueError("max_seq_len must be defined when obey_sequence_order is True")

        attended = C.times(C.softmax(scaled, axis=-1), C.sequence.broadcast_as(unpacked_value, query))  # [#, *] [value_dim,]
        return attended
Beispiel #2
0
def triangular_matrix_seq(mode: int = 1):
    X = C.placeholder(1)
    ones = C.ones_like(X[0])
    perm_1 = C.layers.Recurrence(C.plus, return_full_state=True)(ones)
    perm_2 = C.layers.Recurrence(C.plus,
                                 go_backwards=True,
                                 return_full_state=True)(ones)

    arr_1 = C.sequence.unpack(perm_1, 0, True)
    arr_2 = C.sequence.unpack(perm_2, 0, True)

    mat = C.times_transpose(arr_1, arr_2)
    mat_c = arr_1 * arr_2

    diagonal_mat = mat - mat_c

    final_mat = diagonal_mat
    if mode == 0:
        final_mat = C.equal(final_mat, 0)
    elif mode == 1:
        final_mat = C.less_equal(final_mat, 0)
    elif mode == 2:
        final_mat = C.less(final_mat, 0)
    elif mode == -1:
        final_mat = C.greater_equal(final_mat, 0)
    elif mode == -2:
        final_mat = C.greater(final_mat, 0)

    result = C.as_block(final_mat, [(X, X)], 'triangular_matrix')

    return C.stop_gradient(result)
Beispiel #3
0
def test_ctc_encoder_train_and_network_output_to_labels():
    # test CTC encoder in training loop and CTCEncoder.network_output_to_labels

    a = C.sequence.input_variable(10)
    labels = ['a', 'b', 'c']
    encoder = CTCEncoder(labels)

    labels_tensor = C.sequence.input_variable(len(
        encoder.classes_))  # number of classes = 4
    input_tensor = C.sequence.input_variable(100)

    prediction_tensor = Dense(4)(Recurrence(LSTM(100))(
        C.ones_like(input_tensor)))

    labels_graph = C.labels_to_graph(labels_tensor)

    fb = C.forward_backward(labels_graph,
                            prediction_tensor,
                            blankTokenId=encoder.blankTokenId)

    ground_truth = ['a', 'b', 'b', 'b', 'c']
    seq_length = 10  # must be the same length as the sequence length in network_out

    pred = np.array([
        [0., 2., 0., 0.],
        [0., 2., 0., 0.],
        [0., 0., 2., 0.],
        [2., 0., 0., 0.],
        [0., 0., 2., 0.],
        [2., 0., 0., 0.],
        [0., 0., 2., 0.],
        [2., 0., 0., 0.],
        [0., 0., 0., 2.],
        [0., 0., 0., 2.],
    ]).astype(np.float32)

    n = np.random.random((10, 100)).astype(np.float32)

    # result = fb.eval({labels_tensor: [encoder.transform(ground_truth, seq_length=seq_length)],
    #                   input_tensor: [n]})

    # print(result)

    adam = C.adam(prediction_tensor.parameters, 0.01, 0.912)
    trainer = C.Trainer(prediction_tensor, (fb, ), [adam])

    for i in range(300):
        trainer.train_minibatch({
            labels_tensor:
            [encoder.transform(ground_truth, seq_length=seq_length)],
            input_tensor: [n]
        })

        # print(trainer.previous_minibatch_loss_average)

    result = prediction_tensor.eval({input_tensor: [n]})
    assert encoder.network_output_to_labels(result[0],
                                            squash_repeat=True) == ground_truth
Beispiel #4
0
def positional_encoding(token_dims: int, discount_factor: float = 0.99):
    X = C.placeholder(token_dims, name='positional_encoding')
    encoder = C.layers.Recurrence(C.element_times,
                                  initial_state=1,
                                  return_full_state=True)(C.ones_like(X) *
                                                          discount_factor)
    return C.stop_gradient(
        C.as_block(encoder, [(X, X)], 'positional_encoding',
                   'positional_encoding_'))
Beispiel #5
0
def true_density(z):
    z1, z2 = z[0], z[1]

    w1 = lambda x: C.sin(2 * np.pi * x/4)
    u = 0.5 * C.square((z2 - w1(z1))/0.4)
    dummy = C.ones_like(u) * 1e7

    # u = C.element_select(C.less_equal(z1,4), u, dummy)
    cond = C.less_equal(z1,4)
    u = C.element_select(cond, u, dummy) # u = cond*u + (1-cond)*dummy

    return C.exp(-u)
Beispiel #6
0
def __cntk_cov2__(m):
    m = C.reshape(m, -1)
    m = C.unpack_batch(m)

    m = C.transpose(m, [1, 0])

    count = C.reduce_sum(C.reduce_mean(C.ones_like(m), axis=0))

    fact = 1.0 / (count - 1)
    m -= C.reduce_mean(m, axis=1)
    mt = C.transpose(m, [1, 0])
    return fact * C.squeeze(m @ mt)
Beispiel #7
0
def cgan_discriminator(x, y):
    with C.layers.default_options(init=C.normal(scale=0.02), map_rank=1, use_cntk_engine=True):
        hx = C.reshape(x, (1, 28, 28))
        hy = C.ones_like(hx) * C.reshape(y, (label_dim, 1, 1))
        h = C.splice(hx, hy, axis=0)

        h = C.leaky_relu((Convolution2D((5, 5), 1, strides=(2, 2))(h)), alpha=0.2)
        h = C.leaky_relu(BatchNormalization()(Convolution2D((5, 5), 64, strides=(2, 2))(h)), alpha=0.2)
        h = C.leaky_relu(BatchNormalization()(Dense(1024)(h)), alpha=0.2)

        h = Dense(1, activation=C.sigmoid)(h)

    return h
Beispiel #8
0
 def inner(x):
     mask = dropout(C.ones_like(C.sequence.first(x)))
     mask = C.sequence.broadcast_as(mask, x)
     return mask * x
Beispiel #9
0
 def inner(a):
     # reconcile_dynamic_axes is necessary to avoid subtle bugs e.g. sequence.where and one_hot
     return C.reconcile_dynamic_axes(C.sequence.where(C.ones_like(Cx.scalar(a))), a)