Example #1
0
def gaussian_mdn_coeff(x, nmix: int, ndim: int):
    """
    Extracts the coefficients for gaussian mixture density network.
    Assumes independence between gaussian dimensions.

    Example:
        ndim, nmix = 1, 3
        a = C.input_variable(ndim)
        prediction = Dense((ndim + 2) * nmix)(a)
        coeffs = C.combine(gaussian_mdn_coeff(prediction_tensor, nmix=nmix, ndim=ndim)).eval({a: x})

        alpha, mu, sigma = coeffs.values()

    Arguments:
        x: input tensor
        nmix (int): number of mixture
        ndim (int): number of dimension of gaussian

    Returns:
        tuple

    """

    if len(x.shape) != 1:
        raise ValueError("Must be a 1d tensor, but input has shape {0}".format(
            x.shape))

    alpha = C.softmax(C.slice(x, 0, 0, nmix), name='alpha')
    sigma = C.exp(
        C.slice(x, 0, nmix, 2 * nmix), name='sigma'
    )  # common variance for all components in single gaussian kernel
    mu = C.reshape(C.slice(x, 0, 2 * nmix, (ndim + 2) * nmix),
                   shape=(nmix, ndim),
                   name='mu')
    return alpha, mu, sigma
Example #2
0
    def inner(query, key, value):
        mixed_queries = query_linear(query)  # [#, *] {model_dim,]
        mixed_keys = key_linear(key)  # [#, *] {model_dim,]
        mixed_values = value_linear(value)  # [#, *] {model_dim,]

        # TODO: re-implement `ScaledDotProductAttention` when cntk has BatchMatMul so there's no need to slice here
        queries = [
            C.slice(mixed_queries, 0, i * head_dim, (i + 1) * head_dim)
            for i in range(num_heads)
        ]
        keys = [
            C.slice(mixed_keys, 0, i * head_dim, (i + 1) * head_dim)
            for i in range(num_heads)
        ]
        values = [
            C.slice(mixed_values, 0, i * head_dim, (i + 1) * head_dim)
            for i in range(num_heads)
        ]

        # list of num_heads heads with shape (-3, head_dim) each
        attention_outputs = [
            scaled_dot_product_attention(q, k, v)
            for q, k, v in zip(queries, keys, values)
        ]

        result = multihead_liner(C.splice(*attention_outputs))
        return result
Example #3
0
def create_model(input_dim):
    row = sequence.input_variable(shape=input_dim)
    col = sequence.input_variable(shape=input_dim)
    rowh = Sequential([Embedding(opt.embed), Stabilizer(), Dropout(opt.dropout)])(row)
    colh = Sequential([Embedding(opt.embed), Stabilizer(), Dropout(opt.dropout)])(col)

    x = C.splice(rowh, colh, axis=-1)
    x = lightlstm(opt.embed, opt.nhid)(x)
    x = For(range(opt.layer-1), lambda: lightlstm(opt.nhid, opt.nhid))(x)
    rowh = C.slice(x, -1, opt.nhid * 0, opt.nhid * 1)
    colh = C.slice(x, -1, opt.nhid * 1, opt.nhid * 2)

    row_predict = Sequential([Dropout(opt.dropout), Dense(input_dim)])(rowh)
    col_predict = Sequential([Dropout(opt.dropout), Dense(input_dim)])(colh)

    # variable : row label and col label
    row_label = sequence.input_variable(shape=input_dim)
    col_label = sequence.input_variable(shape=input_dim)
    model = C.combine([row_predict, col_predict])

    return {'row':       row,
            'col':       col,
            'row_label': row_label,
            'col_label': col_label,
            'model':     model}
Example #4
0
def create_network():
    # Create the input and target variables
    input_var = cntk.input_variable(
        (sequence_length, frame_height, frame_width), name='input_var')
    target_var = cntk.input_variable((num_classes, ),
                                     is_sparse=True,
                                     name='target_var')

    input_head = cntk.slice(input_var, axis=0, begin_index=0, end_index=19)
    input_tail = cntk.slice(input_var, axis=0, begin_index=1, end_index=20)
    diff = input_tail - input_head

    model = Sequential([
        resnet_model(cntk.placeholder()),
        Label('resnet'),
        Dense(num_classes, name='output')
    ])(diff)

    return {
        'input': input_var,
        'target': target_var,
        'model': model,
        'loss': cntk.cross_entropy_with_softmax(model, target_var),
        'metric': cntk.classification_error(model, target_var)
    }
Example #5
0
    def createDecoderNetwork(self, networkHiddenSrc, srcLength, trgLength):
        timeZeroHidden = C.slice(networkHiddenSrc, 0, 0, 1)
        srcSentEmb = C.slice(timeZeroHidden, -1, Config.SrcHiddenSize,
                             Config.SrcHiddenSize * 2)
        networkHiddenTrg = {}
        inputTrg = C.reshape(self.inputMatrixTrg,
                             shape=(Config.TrgMaxLength, Config.BatchSize,
                                    Config.TrgVocabSize))
        attProbAll = []
        tce = 0
        for i in range(0, trgLength, 1):

            preTrgEmb = self.initTrgEmb if i == 0 else self.EmbTrg(inputTrg[i -
                                                                            1])

            if (i == 0):
                networkHiddenTrg[i] = self.createDecoderInitNetwork(srcSentEmb)
            else:
                (networkHiddenTrg[i], attProb) = self.createDecoderRNNNetwork(
                    networkHiddenSrc, preTrgEmb, networkHiddenTrg[i - 1],
                    srcLength)
                attProbAll = attProb if i == 1 else C.splice(
                    attProbAll, attProb, axis=0)

            preSoftmax = self.createReadOutNetwork(networkHiddenTrg[i],
                                                   preTrgEmb)
            ce = C.cross_entropy_with_softmax(preSoftmax, inputTrg[i], 2)
            ce = C.reshape(ce, shape=(1, Config.BatchSize))
            tce += C.times_transpose(ce, self.maskMatrixTrg[i])

        return tce
Example #6
0
def create_model(input_dim):
    row = sequence.input_variable(shape=input_dim)
    col = sequence.input_variable(shape=input_dim)
    rowh = Sequential([Embedding(opt.embed), Stabilizer(), Dropout(opt.dropout)])(row)
    colh = Sequential([Embedding(opt.embed), Stabilizer(), Dropout(opt.dropout)])(col)

    x = C.splice(rowh, colh, axis=-1)
    x = lightlstm(opt.embed, opt.nhid)(x)
    x = For(range(opt.layer-1), lambda: lightlstm(opt.nhid, opt.nhid))(x)
    rowh = C.slice(x, -1, opt.nhid * 0, opt.nhid * 1)
    colh = C.slice(x, -1, opt.nhid * 1, opt.nhid * 2)

    row_predict = Sequential([Dropout(opt.dropout), Dense(input_dim)])(rowh)
    col_predict = Sequential([Dropout(opt.dropout), Dense(input_dim)])(colh)

    # variable : row label and col label
    row_label = sequence.input_variable(shape=input_dim)
    col_label = sequence.input_variable(shape=input_dim)
    model = C.combine([row_predict, col_predict])

    return {'row':       row,
            'col':       col,
            'row_label': row_label,
            'col_label': col_label,
            'model':     model}
Example #7
0
def lightlstm(input_dim, cell_dim):
    x = C.placeholder(name='x')
    dh = C.placeholder(name='dh')
    dc = C.placeholder(name='dc')
    x1 = C.slice(x, -1, input_dim * 0, input_dim * 1)
    x2 = C.slice(x, -1, input_dim * 1, input_dim * 2)

    def LSTMCell(x, y, dh, dc):
        '''LightLSTM Cell'''

        b = C.parameter(shape=(4 * cell_dim), init=0)
        W = C.parameter(shape=(input_dim, 4 * cell_dim), init=glorot_uniform())
        H = C.parameter(shape=(cell_dim, 4 * cell_dim), init=glorot_uniform())

        # projected contribution from input x, hidden, and bias
        proj4 = b + C.times(x, W) + C.times(dh, H)

        it_proj = C.slice(proj4, -1, 0 * cell_dim, 1 * cell_dim)
        bit_proj = C.slice(proj4, -1, 1 * cell_dim, 2 * cell_dim)
        ft_proj = C.slice(proj4, -1, 2 * cell_dim, 3 * cell_dim)
        ot_proj = C.slice(proj4, -1, 3 * cell_dim, 4 * cell_dim)

        it = C.sigmoid(it_proj)  # input gate
        bit = it * C.tanh(bit_proj)

        ft = C.sigmoid(ft_proj)  # forget gate
        bft = ft * dc

        ct = bft + bit
        ot = C.sigmoid(ot_proj)  # output gate
        ht = ot * C.tanh(ct)

        # projected contribution from input y, hidden, and bias
        proj4_2 = b + C.times(y, W) + C.times(ht, H)

        it_proj_2 = C.slice(proj4_2, -1, 0 * cell_dim, 1 * cell_dim)
        bit_proj_2 = C.slice(proj4_2, -1, 1 * cell_dim, 2 * cell_dim)
        ft_proj_2 = C.slice(proj4_2, -1, 2 * cell_dim, 3 * cell_dim)
        ot_proj_2 = C.slice(proj4_2, -1, 3 * cell_dim, 4 * cell_dim)

        it_2 = C.sigmoid(it_proj_2)  # input gate
        bit_2 = it_2 * C.tanh(bit_proj_2)

        ft_2 = C.sigmoid(ft_proj_2)  # forget gate
        bft_2 = ft_2 * ct

        ct2 = bft_2 + bit_2
        ot_2 = C.sigmoid(ot_proj_2)  # output gate
        ht2 = ot_2 * C.tanh(ct2)
        return (ht, ct, ht2, ct2)

    Cell = LSTMCell(x1, x2, dh, dc)

    actualDh = past_value(Cell[2])
    actualDc = past_value(Cell[3])

    Cell[0].replace_placeholders(
        {dh: actualDh.output, dc: actualDc.output})
    return C.splice(Cell[0], Cell[2], axis=-1)
Example #8
0
    def inner(a):
        values, valid = C.sequence.unpack(a, padding_value=0).outputs
        values_reversed = C.slice(values, 0, 0, 0, -1)
        valid_reversed = C.slice(valid, 0, 0, 0, -1)

        values_seq = C.to_sequence(values_reversed)
        valid_seq = C.to_sequence(C.expand_dims(valid_reversed, axis=-1))
        a_reversed = C.sequence.gather(values_seq, valid_seq)
        return a_reversed
Example #9
0
def test_slice_attributes():
    x = C.input_variable((2,3))
    f = C.slice(x, 0, 1, 2)
    d = f.root_function.attributes
    expected = {'endIndex': 2, 'beginIndex': 1, 'axis': ('ordered', 'static', 1), 'sliceStrides': 1}
    _check(expected, d)
    f = C.slice(x, [0,1], [1,0], [2,2], [-1,1])
    d = f.root_function.attributes
    expected = {'endIndexVec': [2,2], 'beginIndexVec': [1,0], 'axisVec': [('ordered', 'static', 1), ('ordered', 'static', 0)], 'sliceStridesVec': [-1, 1]}
    _check(expected, d)
Example #10
0
    def gaussian_windows_attention_coefficients(abk, nb_mixtures):
        """ Split into 3 equal tensor of dim nb_mixtures """
        a = C.slice(abk, 0, 0, nb_mixtures)
        b = C.slice(abk, 0, nb_mixtures, 2 * nb_mixtures)
        k = C.slice(abk, 0, 2 * nb_mixtures, 0)
        k = Recurrence(C.plus)(k)

        a = C.expand_dims(a, axis=-1)
        b = C.expand_dims(b, axis=-1)
        k = C.expand_dims(k, axis=-1)
        return a, b, k
Example #11
0
def test_Slice(tmpdir):
    data = np.asarray([[1, 2, -3], [4, 5, 6]], dtype=np.float32)
    x1 = C.input_variable((2, 3))

    model = C.slice(data, 0, 1, 2)
    verify_no_input(model, tmpdir, 'Slice_0')

    model = C.slice(x1, 0, 1, 2)
    verify_one_input(model, data, tmpdir, 'Slice_1')

    model = C.slice(x1, [0, 1], [1, 0], [2, 1])
    verify_one_input(model, data, tmpdir, 'Slice2_1')
Example #12
0
def test_Slice(tmpdir, dtype):
    with C.default_options(dtype = dtype):
        data = np.asarray([[1,2,-3], [4, 5, 6]],dtype=dtype)
        x1 = C.input_variable((2,3))

        model = C.slice(data, 0, 1, 2)
        verify_no_input(model, tmpdir, 'Slice_0')

        model = C.slice(x1, 0, 1, 2)
        verify_one_input(model, data, tmpdir, 'Slice_1')

        model = C.slice(x1, [0,1], [1,0], [2,1]);
        verify_one_input(model, data, tmpdir, 'Slice2_1')
Example #13
0
def test_Slice(tmpdir, dtype):
    with C.default_options(dtype=dtype):
        data = np.asarray([[1, 2, -3], [4, 5, 6]], dtype=dtype)
        x1 = C.input_variable((2, 3))

        model = C.slice(data, 0, 1, 2)
        verify_no_input(model, tmpdir, 'Slice_0')

        model = C.slice(x1, 0, 1, 2)
        verify_one_input(model, data, tmpdir, 'Slice_1')

        model = C.slice(x1, [0, 1], [1, 0], [2, 1])
        verify_one_input(model, data, tmpdir, 'Slice2_1')
Example #14
0
def test_op_slice_sequence(input_data, slice_params, expected_result, device_id, precision):
    input_data = AA(input_data, dtype=PRECISION_TO_TYPE[precision])

    t = Axis.new_unique_dynamic_axis('t')
    sample_shape = input_data.shape[1:]
    a = I(shape=sample_shape,
          data_type=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]),
          needs_gradient=True,
          dynamic_axes=[Axis.default_batch_axis(), t],
          name='a')

    result = C.slice(a, axis=t, begin_index=slice_params[
                     0], end_index=slice_params[1])

    def grad_slice(x, beg_index, end_index):
        res = np.zeros_like(x)
        res[beg_index:end_index] = 1
        return res

    expected_gradient = grad_slice(np.asarray(input_data), *slice_params)

    expected_forward = AA(
        [expected_result], dtype=PRECISION_TO_TYPE[precision])
    expected_backward = {
        a: [grad_slice(np.asarray(input_data), *slice_params)]
    }

    # create batch
    input_data.shape = (1,) + input_data.shape

    forward_input = {a: input_data}
    unittest_helper(result,
                    forward_input, expected_forward, expected_backward,
                    device_id=device_id, precision=precision)
Example #15
0
def test_op_slice_sequence(input_data, slice_params, expected_result, device_id, precision):
    # Forward pass test
    #==================
    # We compute the expected output for the forward pass.
    # We need two surrounding brackets:
    # The first for sequences (length=1, since we have dynamic_axis='').
    # The second for batch of one sample.

    # 1 sample with 2 sequence element of a vector of 3

    t = C.dynamic_axis(name='t')
    a = I([input_data], dynamic_axis=t)

    # slice using the operator
    result = C.slice(a, slice_params[0], slice_params[1], axis='t')
    result = C.identity(result) # required hack because Slice doesn't propagate tag

    unittest_helper(result, None, [expected_result], device_id=device_id, 
                precision=precision, clean_up=False, backward_pass=False)

    # Backward pass test
    # ==================
    # The gradient of the slice operator is a tensor of the same shape as the
    # input tensor, having 1 for elements that were taken and 0 for elements
    # that were dropped.

    def grad_slice(x, beg_index, end_index):
        res = np.zeros_like(x)
        res[beg_index:end_index] = 1
        return res

    expected_gradient = grad_slice(np.asarray(input_data), *slice_params)
    
    unittest_helper(result, None, [expected_gradient], device_id = device_id,
                    precision=precision, clean_up=True, backward_pass=True, input_node=a)
 def process_history(hist, inp):
     wk = C.slice(hist, 0, 0, myConfig['wg_dim'])
     wn = hist[myConfig['wg_dim']:]
     hist_processed = embed_layer(wk, wn)
     out_logits = s2smodel(hist_processed, inp)
     hamax = C.reshape(C.hardmax(out_logits), (-1, ))
     return hamax
Example #17
0
def test_op_slice(input_data, slice_params, expected_result, device_id, precision):
    # Forward pass test
    #==================
    # We compute the expected output for the forward pass.
    # We need two surrounding brackets:
    # The first for sequences (length=1, since we have dynamic_axis='').
    # The second for batch of one sample.

    a = I([input_data])
    def op_slice(x, beg_index, end_index, axis):
        return x[beg_index:end_index]

    def _ax_slices(x, beg_index, end_index, axis):
        '''
        Creates a NumPy slicing array from slice operator's arguments
        '''
        ax_slices = []
        for i in range(0, len(x.shape)):
            if i==axis:
                if end_index >= x.shape[i]:
                    ax_slices.append([beg_index,])
                else:
                    ax_slices.append([beg_index,end_index])
            else:
                ax_slices.append(slice(None)) # corresponds to ':'
        return ax_slices


    # slice using the operator
    result = C.slice(a, *slice_params)

    unittest_helper(result, None, [[expected_result]], device_id=device_id, 
                precision=precision, clean_up=True, backward_pass=False)

    # slice using the overload
    ax_slices = _ax_slices(a, *slice_params)
    result = a[ax_slices]

    unittest_helper(result, None, [[expected_result]], device_id=device_id, 
                precision=precision, clean_up=True, backward_pass=False)
    # Backward pass test
    # ==================
    # The gradient of the slice operator is a tensor of the same shape as the
    # input tensor, having 1 for elements that were taken and 0 for elements
    # that were dropped.

    def grad_slice(x, beg_index, end_index, axis):
        res = np.zeros_like(x)
        ax_slices = _ax_slices(x, beg_index, end_index, axis)
        res[ax_slices] = x[ax_slices]
        res[res!=0] = 1
        return res

    expected_gradient = grad_slice(np.asarray(input_data), *slice_params)
    
    unittest_helper(result, None, [[expected_gradient]], device_id = device_id,
                    precision=precision, clean_up=True, backward_pass=True, input_node=a)
Example #18
0
def test_slice_attributes():
    x = C.input((2, 3))
    f = C.slice(x, 0, 1, 2)
    d = f.root_function.attributes
    expected = {
        'endIndex': 2,
        'beginIndex': 1,
        'axis': ('ordered', 'static', 1)
    }
    _check(expected, d)
    f = C.slice(x, [0, 1], [1, 0], [2, 2])
    d = f.root_function.attributes
    expected = {
        'endIndexVec': [2, 2],
        'beginIndexVec': [1, 0],
        'axisVec': [('ordered', 'static', 1), ('ordered', 'static', 0)]
    }
    _check(expected, d)
Example #19
0
def test_slice_attributes():
    x = C.input_variable((2, 3))
    f = C.slice(x, 0, 1, 2)
    d = f.root_function.attributes
    expected = {
        'endIndex': 2,
        'beginIndex': 1,
        'axis': ('ordered', 'static', 1)
    }
    _check(expected, d)
Example #20
0
 def createDecodingNetworks(self, srcHiddenStates, trgPreWord, trgPreHidden,
                            srcLength):
     preTrgEmb = self.EmbTrg(trgPreWord)
     (decoderHidden, attProb) = self.createDecoderRNNNetwork(
         C.slice(srcHiddenStates, 0, 0, srcLength), preTrgEmb, trgPreHidden,
         srcLength)
     preSoftmax = self.createReadOutNetwork(decoderHidden, preTrgEmb)
     decoderPredict = self.createPredictionNetwork(preSoftmax)
     decoderPredictNet = C.combine(decoderHidden, decoderPredict)
     return (decoderPredictNet,
             [decoderHidden.output, decoderPredict.output])
Example #21
0
def criteria(label, output, block_size, c_classes, weights):
    ''' Define the loss function and metric '''
    probs = cntk.softmax(output, axis=0)
    log_probs = cntk.log(probs)
    ce = cntk.times(weights,
                    -cntk.element_times(log_probs, label),
                    output_rank=2)
    mean_ce = cntk.reduce_mean(ce)
    _, w, h = label.shape
    pe = cntk.classification_error(probs, label, axis=0) - \
     cntk.reduce_sum(cntk.slice(label, 0, 0, 1)) / cntk.reduce_sum(label)
    return (mean_ce, pe)
Example #22
0
 def cnwindow(mna,window):
     mnas=mna.shape
     mnout=(*mnas[:-2],*window,((mnas[-2]-window[-2])+1),((mnas[-1]-window[-1])+1))
     mne2=None
     for R in range(window[0]):
         j_lim = R + mnout[-2]
         for H in range(window[1]):
             tdata=C.slice(mna,[-2,-1], [R,H], [j_lim,(H +  mnout[-1])])
             if mne2 is None:
                 mne2=tdata
             else:
                 mne2=C.splice(mne2,tdata,axis=1)
     return(C.reshape(C.transpose(C.reshape(mne2, shape=mnout),(0,5,4,3,2,1)), (mnout[0],*mnout[5:3:-1],1,*mnout[3:0:-1])))
Example #23
0
def resu_model(input, num_stack_layers, c_map, num_classes, block_size):
    r = cntk.slice(input, 0, 0, 1)
    g = cntk.slice(input, 0, 1, 2)
    b = cntk.slice(input, 0, 2, 3)
    i = cntk.slice(input, 0, 3, 4)

    r -= reduce_mean(r)
    g -= reduce_mean(g)
    b -= reduce_mean(b)
    #i -= reduce_mean(i)

    input_do = splice(splice(splice(r, g, axis=0), b, axis=0), i, axis=0)

    conv = conv_bn(input_do, (3, 3), c_map[0])

    r1 = resnet_basic_stack(conv, num_stack_layers, c_map[0])

    r2_1 = resnet_basic_inc(r1, c_map[1])
    r2_2 = resnet_basic_stack(r2_1, num_stack_layers-1, c_map[1])
    
    r3_1 = resnet_basic_inc(r2_2, c_map[2])
    r3_2 = resnet_basic_stack(r3_1, num_stack_layers-1, c_map[2])

    r4_1 = resnet_basic_inc(r3_2, c_map[3])
    r4_2 = resnet_basic_stack(r4_1, num_stack_layers-1, c_map[3])

    r4_us = layers.ConvolutionTranspose((3, 3), c_map[3], strides=2, output_shape=(block_size/4, block_size/4), pad=True, bias=False, init=bilinear(3, 3))(r4_2)

    o3 = relu(layers.Convolution((1, 1), c_map[2])(r3_2) + layers.Convolution((1, 1), c_map[2])(r4_us))
    o3_us = layers.ConvolutionTranspose((3, 3), c_map[2], strides=2, output_shape=(block_size/2, block_size/2), pad=True, bias=False, init=bilinear(3, 3))(o3)

    o2 = relu(layers.Convolution((1, 1), c_map[1])(r2_2) + layers.Convolution((1, 1), c_map[1])(o3_us))
    o2_us = layers.ConvolutionTranspose((3, 3), c_map[1], strides=2, output_shape=(block_size, block_size), pad=True, bias=False, init=bilinear(3, 3))(o2)

    o1 = relu(layers.Convolution((3, 3), c_map[0], pad=True)(input_do) + layers.Convolution((1, 1), c_map[0])(r1) + layers.Convolution((1, 1), c_map[0])(o2_us))

    return layers.Convolution((3, 3), num_classes, pad=True, activation=relu)(o1)
Example #24
0
def test_op_slice_sequence(input_data, slice_params, expected_result,
                           device_id, precision):
    # Forward pass test
    #==================
    # We compute the expected output for the forward pass.
    # We need two surrounding brackets:
    # The first for sequences (length=1, since we have dynamic_axis='').
    # The second for batch of one sample.

    # 1 sample with 2 sequence element of a vector of 3

    t = C.dynamic_axis(name='t')
    a = I([input_data], dynamic_axis=t)

    # slice using the operator
    result = C.slice(a, slice_params[0], slice_params[1], axis='t')
    result = C.identity(
        result)  # required hack because Slice doesn't propagate tag

    unittest_helper(result,
                    None, [expected_result],
                    device_id=device_id,
                    precision=precision,
                    clean_up=False,
                    backward_pass=False)

    # Backward pass test
    # ==================
    # The gradient of the slice operator is a tensor of the same shape as the
    # input tensor, having 1 for elements that were taken and 0 for elements
    # that were dropped.

    def grad_slice(x, beg_index, end_index):
        res = np.zeros_like(x)
        res[beg_index:end_index] = 1
        return res

    expected_gradient = grad_slice(np.asarray(input_data), *slice_params)

    unittest_helper(result,
                    None, [expected_gradient],
                    device_id=device_id,
                    precision=precision,
                    clean_up=True,
                    backward_pass=True,
                    input_node=a)
Example #25
0
    def LSTMCell(x, y, dh, dc):
        '''LightLSTM Cell'''

        b = C.parameter(shape=(4 * cell_dim), init=0)
        W = C.parameter(shape=(input_dim, 4 * cell_dim), init=glorot_uniform())
        H = C.parameter(shape=(cell_dim, 4 * cell_dim), init=glorot_uniform())

        # projected contribution from input x, hidden, and bias
        proj4 = b + C.times(x, W) + C.times(dh, H)

        it_proj = C.slice(proj4, -1, 0 * cell_dim, 1 * cell_dim)
        bit_proj = C.slice(proj4, -1, 1 * cell_dim, 2 * cell_dim)
        ft_proj = C.slice(proj4, -1, 2 * cell_dim, 3 * cell_dim)
        ot_proj = C.slice(proj4, -1, 3 * cell_dim, 4 * cell_dim)

        it = C.sigmoid(it_proj)  # input gate
        bit = it * C.tanh(bit_proj)

        ft = C.sigmoid(ft_proj)  # forget gate
        bft = ft * dc

        ct = bft + bit
        ot = C.sigmoid(ot_proj)  # output gate
        ht = ot * C.tanh(ct)

        # projected contribution from input y, hidden, and bias
        proj4_2 = b + C.times(y, W) + C.times(ht, H)

        it_proj_2 = C.slice(proj4_2, -1, 0 * cell_dim, 1 * cell_dim)
        bit_proj_2 = C.slice(proj4_2, -1, 1 * cell_dim, 2 * cell_dim)
        ft_proj_2 = C.slice(proj4_2, -1, 2 * cell_dim, 3 * cell_dim)
        ot_proj_2 = C.slice(proj4_2, -1, 3 * cell_dim, 4 * cell_dim)

        it_2 = C.sigmoid(it_proj_2)  # input gate
        bit_2 = it_2 * C.tanh(bit_proj_2)

        ft_2 = C.sigmoid(ft_proj_2)  # forget gate
        bft_2 = ft_2 * ct

        ct2 = bft_2 + bit_2
        ot_2 = C.sigmoid(ot_proj_2)  # output gate
        ht2 = ot_2 * C.tanh(ct2)
        return (ht, ct, ht2, ct2)
Example #26
0
def slice(x, axis, begin_index, end_index, name=''): 
    '''
    Slice the input along an axis.    

    Examples:
        >>> # create 2x3 matrix in a sequence of length 1 in a batch of one sample
        >>> data = np.asarray([[[1, 2, -3],
        ...                     [4, 5,  6]]])
        >>> x = C.input_numpy(data)
        >>> # slice index 1 (second) at first axis
        >>> C.eval(C.slice(x, 1, 2, 0))
        [array([[[ 4.,  5.,  6.]]])]
        >>> # slice index 0 (first) at second axis
        >>> C.eval(C.slice(x, 0, 1, 1))
        [array([[[ 1.],
                 [ 4.]]])]        

    NumPy's way of slicing works, too:

    Examples:
        >>> C.eval(x[1])
        [array([[[ 4.,  5.,  6.]]])]
        >>> C.eval(x[:,:2,:])
        [array([[[ 1.,  2.],
                 [ 4.,  5.]]])]

    Args:
        x: input tensor
        axis (:class:`cntk.Axis`): axis along which `begin_index` and `end_index` will be used. 
        begin_index (int): the index along axis where the slicing starts
        end_index (int): the index along axis where the slicing ends        
        name (str): the name of the node in the network
        
    See also:
        Indexing in NumPy: http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html

    Returns:
        :class:`cntk.Function`
    '''
    from cntk import slice
    x = sanitize_input(x)
    return slice(x, axis, begin_index, end_index, name).output()     
Example #27
0
def slice(x, axis, begin_index, end_index, name=''): 
    '''
    Slice the input along an axis.    

    Examples:
        >>> # create 2x3 matrix in a sequence of length 1 in a batch of one sample
        >>> data = np.asarray([[[1, 2, -3],
        ...                     [4, 5,  6]]])
        >>> x = C.input_numpy(data)
        >>> # slice index 1 (second) at first axis
        >>> C.eval(C.slice(x, 1, 2, 0))
        [array([[[ 4.,  5.,  6.]]])]
        >>> # slice index 0 (first) at second axis
        >>> C.eval(C.slice(x, 0, 1, 1))
        [array([[[ 1.],
                 [ 4.]]])]        

    NumPy's way of slicing works, too:

    Examples:
        >>> C.eval(x[1])
        [array([[[ 4.,  5.,  6.]]])]
        >>> C.eval(x[:,:2,:])
        [array([[[ 1.,  2.],
                 [ 4.,  5.]]])]

    Args:
        x: input tensor
        axis (:class:`cntk.Axis`): axis along which `begin_index` and `end_index` will be used. 
        begin_index (int): the index along axis where the slicing starts
        end_index (int): the index along axis where the slicing ends        
        name (str): the name of the node in the network
        
    See also:
        Indexing in NumPy: http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html

    Returns:
        :class:`cntk.Function`
    '''
    from cntk import slice
    x = sanitize_input(x)
    return slice(x, axis, begin_index, end_index, name).output()     
Example #28
0
    def createNetwork(self, inputEmb, preHidden, preMem):
        WX = C.times(inputEmb, self.W) + self.Wb
        UH = C.times(preHidden, self.U) + self.Ub

        I = C.sigmoid(
            C.slice(WX, -1, 0, self.hiddenSize) +
            C.slice(UH, -1, 0, self.hiddenSize))
        O = C.sigmoid(
            C.slice(WX, -1, self.hiddenSize, self.hiddenSize * 2) +
            C.slice(UH, -1, self.hiddenSize, self.hiddenSize * 2))
        F = C.sigmoid(
            C.slice(WX, -1, self.hiddenSize * 2, self.hiddenSize * 3) +
            C.slice(UH, -1, self.hiddenSize * 2, self.hiddenSize * 3))
        N = C.tanh(
            C.slice(WX, -1, self.hiddenSize * 3, self.hiddenSize * 4) +
            C.slice(UH, -1, self.hiddenSize * 3, self.hiddenSize * 4))

        NI = C.element_times(N, I)
        FM = C.element_times(F, preMem)
        CurMem = NI + FM
        CurH = C.element_times(C.tanh(CurMem), O)
        return (CurH, CurMem)
Example #29
0
def test_op_slice_sequence(input_data, slice_params, expected_result,
                           device_id, precision):
    input_data = AA(input_data, dtype=PRECISION_TO_TYPE[precision])

    t = C.Axis.new_unique_dynamic_axis('t')
    sample_shape = input_data.shape[1:]
    a = I(shape=sample_shape,
          data_type=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]),
          needs_gradient=True,
          dynamic_axes=[C.Axis.default_batch_axis(), t],
          name='a')

    result = C.slice(a,
                     axis=t,
                     begin_index=slice_params[0],
                     end_index=slice_params[1])

    def grad_slice(x, beg_index, end_index):
        res = np.zeros_like(x)
        res[beg_index:end_index] = 1
        return res

    expected_gradient = grad_slice(np.asarray(input_data), *slice_params)

    expected_forward = AA([expected_result],
                          dtype=PRECISION_TO_TYPE[precision])
    expected_backward = {
        a: [grad_slice(np.asarray(input_data), *slice_params)]
    }

    # create batch
    input_data.shape = (1, ) + input_data.shape

    forward_input = {a: input_data}
    unittest_helper(result,
                    forward_input,
                    expected_forward,
                    expected_backward,
                    device_id=device_id,
                    precision=precision)
Example #30
0
    def createNetwork(self, inputEmb, preHidden):
        WX = C.times(inputEmb, self.W) + self.Wb
        UH = C.times(preHidden, self.U) + self.Ub

        R = C.sigmoid(
            C.slice(WX, -1, 0, self.hiddenSize) +
            C.slice(UH, -1, 0, self.hiddenSize))
        Z = C.sigmoid(
            C.slice(WX, -1, self.hiddenSize, self.hiddenSize * 2) +
            C.slice(UH, -1, self.hiddenSize, self.hiddenSize * 2))

        UHR = C.element_times(
            C.slice(UH, -1, self.hiddenSize * 2, self.hiddenSize * 3), R)
        HTilde = C.tanh(
            C.slice(WX, -1, self.hiddenSize * 2, self.hiddenSize * 3) + UHR)

        CurH = C.element_times(HTilde, 1 - Z) + C.element_times(preHidden, Z)
        return CurH
Example #31
0
    def createAttentionNet(self, hiddenSrc, curHiddenTrg, srcLength):
        srcHiddenSize = Config.SrcHiddenSize * 2
        hsw = C.times(hiddenSrc, self.Was)
        htw = C.times(curHiddenTrg, self.Wat)
        hst = C.reshape(
            hsw, shape=(srcLength, Config.BatchSize * Config.TrgHiddenSize)
        ) + C.reshape(htw, shape=(1, Config.BatchSize * Config.TrgHiddenSize))
        hstT = C.reshape(C.tanh(hst),
                         shape=(srcLength * Config.BatchSize,
                                Config.TrgHiddenSize))
        attScore = C.reshape(C.times(hstT, self.Wav),
                             shape=(srcLength, Config.BatchSize))
        maskOut = (C.slice(self.maskMatrixSrc, 0, 0, srcLength) - 1) * 99999999
        nAttScore = attScore + maskOut
        attProb = C.reshape(C.softmax(nAttScore, axis=0),
                            shape=(srcLength, Config.BatchSize, 1))
        attVector = hiddenSrc * attProb
        contextVector = C.reduce_sum(C.reshape(
            attVector, shape=(srcLength, Config.BatchSize * srcHiddenSize)),
                                     axis=0)
        contextVector = C.reshape(contextVector,
                                  shape=(1, Config.BatchSize, srcHiddenSize))

        return (contextVector, attProb)
Example #32
0
 def inner(a):
     return C.slice(C.reshape(a, (-1, )), 0, 0, 1)
Example #33
0
def test_Slice(tmpdir):
    data = np.asarray([[[1,2,-3], [4, 5, 6]]],dtype=np.float32)
    x1 = C.input_variable((2,3))
    model = C.slice(x1, 0, 1, 2)
    verify_one_input(model, data, tmpdir, 'Slice_0')
Example #34
0
def freq_grid(input, output_dim, slice_size=10, slice_overlap=5):
    # slice the input vector along frequency
    input_dim = input.shape[0]

    right_ind = slice_size
    # array with freq outputs at the prev time step
    m_t_1_k_list = []
    c_t_1_k_list = []

    while (right_ind <= input_dim):
        name1 = 'm_t_1_k' + str(right_ind)
        m_t_1_k_list.append(
            C.placeholder(shape=(output_dim),
                          dynamic_axes=input.dynamic_axes,
                          name=name1))
        name1 = 'c_t_1_k' + str(right_ind)
        c_t_1_k_list.append(
            C.placeholder(shape=(output_dim),
                          dynamic_axes=input.dynamic_axes,
                          name=name1))
        right_ind = right_ind + slice_overlap

    left_ind = 0
    right_ind = slice_size
    k_ind = 0
    GLSTM_cell_list = []
    GLSTM_cell = grid_lstm_factory(slice_size, output_dim)
    while (right_ind <= input_dim):
        freq_slice = C.slice(input, 0, left_ind, right_ind)
        if k_ind == 0:
            f_x_h_c = GLSTM_cell(m_t_1_k_list[k_ind],
                                 C.Constant(0, (output_dim)), c_t_1_k_list[0],
                                 C.Constant(0, (output_dim)), freq_slice)
        else:
            f_x_h_c = GLSTM_cell(m_t_1_k_list[k_ind],
                                 GLSTM_cell_list[k_ind - 1].outputs[1],
                                 c_t_1_k_list[k_ind],
                                 GLSTM_cell_list[k_ind - 1].outputs[3],
                                 freq_slice)

        GLSTM_cell_list.append(f_x_h_c)

        right_ind = right_ind + slice_overlap
        left_ind = left_ind + slice_overlap
        k_ind = k_ind + 1

    result = C.splice(C.combine([GLSTM_cell_list[0].outputs[0]]),
                      C.combine([GLSTM_cell_list[0].outputs[1]]))
    i = 0
    while i < k_ind:
        replacements = {
            m_t_1_k_list[i]:
            C.sequence.past_value(GLSTM_cell_list[i].outputs[0]).output,
            c_t_1_k_list[i]:
            C.sequence.past_value(GLSTM_cell_list[i].outputs[2]).output
        }
        GLSTM_cell_list[i].replace_placeholders(replacements)
        result = C.splice(result, C.combine([GLSTM_cell_list[i].outputs[0]]),
                          C.combine([GLSTM_cell_list[i].outputs[1]]))
        i = i + 1

    assert ((right_ind - slice_overlap) == input_dim)

    return result
Example #35
0
 def inner(x):
     return C.slice(C.reshape(x, (-1, )), 0, 0, 1)
Example #36
0
def test_slice_attributes():
    x = C.input_variable((2,3))
    f = C.slice(x, 0, 1, 2)
    d = f.root_function.attributes
    expected = {'endIndex': 2, 'beginIndex': 1, 'axis': ('ordered', 'static', 1)}
    _check(expected, d)
Example #37
0
 def find_embed(x):
     gx, ngx = C.slice(x, 0, 0, self.wg_dim), C.slice(x, 0, self.wg_dim, self.vocab_size)
     return embed(gx, ngx) 
Example #38
0
def test_op_slice(input_data, slice_params, expected_result, device_id, precision):

    input_data = AA(input_data, dtype=PRECISION_TO_TYPE[precision])
    a = I(
        shape=input_data.shape,
        data_type=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]),
        needs_gradient=True,
        name="a",
    )

    def _ax_slices(x, beg_index, end_index, axis):
        """
        Creates a NumPy slicing array from slice operator's arguments
        """
        ax_slices = []
        for i in range(0, len(x.shape)):
            if i == axis:
                if end_index >= x.shape[i]:
                    ax_slices.append([beg_index])
                else:
                    ax_slices.append([beg_index, end_index])
            else:
                ax_slices.append(slice(None))  # corresponds to ':'
        return ax_slices

    # slice using the overload
    if False:  # FIXME remove ones the overloads are in place
        # slice using the operator
        result = C.slice(a, *slice_params)
        ax_slices = _ax_slices(a, *slice_params)
        result = a[ax_slices]

        unittest_helper(
            result,
            None,
            [[expected_result]],
            device_id=device_id,
            precision=precision,
            clean_up=True,
            backward_pass=False,
        )

    # Backward pass test
    # ==================
    # The gradient of the slice operator is a tensor of the same shape as the
    # input tensor, having 1 for elements that were taken and 0 for elements
    # that were dropped.

    def grad_slice(x, beg_index, end_index, axis):
        res = np.zeros_like(x)
        ax_slices = _ax_slices(x, beg_index, end_index, axis)
        res[ax_slices] = x[ax_slices]
        res[res != 0] = 1
        return res

    expected_forward = [AA([expected_result], dtype=PRECISION_TO_TYPE[precision])]
    expected_backward = {"arg": [[grad_slice(np.asarray(input_data), *slice_params)]]}

    _test_unary_op(
        precision,
        device_id,
        C.slice,
        input_data,
        expected_forward,
        expected_backward,
        {"begin_index": slice_params[0], "end_index": slice_params[1], "axis": slice_params[2]},
    )
Example #39
0
def LSTM(shape,
         _inf,
         cell_shape=None,
         use_peepholes=False,
         init=_default_initializer,
         init_bias=0,
         enable_self_stabilization=False):  # (x, (h, c))
    has_projection = cell_shape is not None
    has_aux = False

    if has_aux:
        UntestedBranchError("LSTM, has_aux option")
    if enable_self_stabilization:
        UntestedBranchError("LSTM, enable_self_stabilization option")

    shape = _as_tuple(shape)

    cell_shape = _as_tuple(cell_shape) if cell_shape is not None else shape

    #stack_axis = -1  #
    stack_axis = 0  # BUGBUG: should be -1, i.e. the fastest-changing one, to match BS
    # determine stacking dimensions
    cell_shape_list = list(cell_shape)
    stacked_dim = cell_shape_list[0]
    cell_shape_list[stack_axis] = stacked_dim * 4
    cell_shape_stacked = tuple(
        cell_shape_list)  # patched dims with stack_axis duplicated 4 times

    # parameters
    b = Parameter(cell_shape_stacked, init=init_bias, name='b')  # a bias
    W = Parameter(_inf.shape + cell_shape_stacked, init=init,
                  name='W')  # input
    A = Parameter(_inf.shape + cell_shape_stacked, init=init,
                  name='A') if has_aux else None  # aux input (optional)
    H = Parameter(shape + cell_shape_stacked, init=init,
                  name='H')  # hidden-to-hidden
    Ci = Parameter(
        cell_shape, init=init, name='Ci'
    ) if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}
    Cf = Parameter(
        cell_shape, init=init, name='Cf'
    ) if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}
    Co = Parameter(
        cell_shape, init=init, name='Co'
    ) if use_peepholes else None  # cell-to-hiddden {note: applied elementwise}

    Wmr = ParameterTensor(
        cell_shape + shape, init=init, init_value_scale=init_value_scale
    ) if has_projection else None  # final projection

    Sdh = Stabilizer(_inf=_inf.with_shape(
        shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(shape))
    Sdc = Stabilizer(_inf=_inf.with_shape(
        cell_shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(cell_shape))
    Sct = Stabilizer(_inf=_inf.with_shape(
        cell_shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(cell_shape))
    Sht = Stabilizer(_inf=_inf.with_shape(
        shape)) if enable_self_stabilization else Identity(
            _inf=_inf.with_shape(shape))

    def create_hc_placeholder():
        return (Placeholder(_inf=_inf.with_shape(shape), name='hPh'),
                Placeholder(_inf=_inf.with_shape(cell_shape),
                            name='cPh'))  # (h, c)

    # parameters to model function
    x = Placeholder(_inf=_inf, name='lstm_block_arg')
    prev_state = create_hc_placeholder()

    # formula of model function
    dh, dc = prev_state

    dhs = Sdh(dh)  # previous values, stabilized
    dcs = Sdc(dc)
    # note: input does not get a stabilizer here, user is meant to do that outside

    # projected contribution from input(s), hidden, and bias
    proj4 = b + times(x, W) + times(dhs, H) + times(aux, A) if has_aux else \
            b + times(x, W) + times(dhs, H)

    it_proj = slice(proj4, stack_axis, 0 * stacked_dim,
                    1 * stacked_dim)  # split along stack_axis
    bit_proj = slice(proj4, stack_axis, 1 * stacked_dim, 2 * stacked_dim)
    ft_proj = slice(proj4, stack_axis, 2 * stacked_dim, 3 * stacked_dim)
    ot_proj = slice(proj4, stack_axis, 3 * stacked_dim, 4 * stacked_dim)

    # add peephole connection if requested
    def peep(x, c, C):
        return x + C * c if use_peepholes else x

    it = sigmoid(peep(it_proj, dcs, Ci))  # input gate(t)
    bit = it * tanh(bit_proj)  # applied to tanh of input network

    ft = sigmoid(peep(ft_proj, dcs, Cf))  # forget-me-not gate(t)
    bft = ft * dc  # applied to cell(t-1)

    ct = bft + bit  # c(t) is sum of both

    ot = sigmoid(peep(ot_proj, Sct(ct), Co))  # output gate(t)
    ht = ot * tanh(ct)  # applied to tanh(cell(t))

    c = ct  # cell value
    h = times(Sht(ht), Wmr) if has_projection else \
        ht

    _name_node(h, 'h')
    if _trace_layers:
        _log_node(h)  # this looks right
    _name_node(c, 'c')

    # TODO: figure out how to do scoping, and also rename all the apply... to expression
    apply_x_h_c = combine([h, c])
    # return to caller a helper function to create placeholders for recurrence
    apply_x_h_c.create_placeholder = create_hc_placeholder
    _name_and_extend_Function(apply_x_h_c, 'LSTM')
    return apply_x_h_c
Example #40
0
def center_square(output, block_size, padding):
    return (cntk.slice(cntk.slice(output, 1, padding, block_size - padding), 2,
                       padding, block_size - padding))
Example #41
0
def test_op_slice(input_data, slice_params, expected_result, device_id,
                  precision):

    input_data = AA(input_data, dtype=PRECISION_TO_TYPE[precision])
    a = I(shape=input_data.shape,
          data_type=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]),
          needs_gradient=True,
          name='a')

    def _ax_slices(x, beg_index, end_index, axis):
        '''
        Creates a NumPy slicing array from slice operator's arguments
        '''
        ax_slices = []
        for i in range(0, len(x.shape)):
            if i == axis:
                if end_index >= x.shape[i]:
                    ax_slices.append([
                        beg_index,
                    ])
                else:
                    ax_slices.append([beg_index, end_index])
            else:
                ax_slices.append(slice(None))  # corresponds to ':'
        return ax_slices

    # slice using the overload
    if False:  # FIXME remove ones the overloads are in place
        # slice using the operator
        result = C.slice(a, *slice_params)
        ax_slices = _ax_slices(a, *slice_params)
        result = a[ax_slices]

        unittest_helper(result,
                        None, [[expected_result]],
                        device_id=device_id,
                        precision=precision,
                        clean_up=True,
                        backward_pass=False)

    # Backward pass test
    # ==================
    # The gradient of the slice operator is a tensor of the same shape as the
    # input tensor, having 1 for elements that were taken and 0 for elements
    # that were dropped.

    def grad_slice(x, beg_index, end_index, axis):
        res = np.zeros_like(x)
        ax_slices = _ax_slices(x, beg_index, end_index, axis)
        res[ax_slices] = x[ax_slices]
        res[res != 0] = 1
        return res

    expected_forward = [
        AA([expected_result], dtype=PRECISION_TO_TYPE[precision])
    ]
    expected_backward = {
        'arg': [[grad_slice(np.asarray(input_data), *slice_params)]]
    }

    _test_unary_op(
        precision, device_id, C.slice, input_data, expected_forward,
        expected_backward, {
            'begin_index': slice_params[0],
            'end_index': slice_params[1],
            'axis': slice_params[2]
        })