Esempio n. 1
0
def test_noise_injection_with_checkpointing():
    from cntk import initializer
    shape = (100,100)
    
    w1 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    
    lr=learning_rate_schedule(0.5, UnitType.sample)
    m=C.momentum_schedule(0.99)

    learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner2 = C.momentum_sgd([w2], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner3 = C.momentum_sgd([w3], lr, m, gaussian_noise_injection_std_dev=0.5)

    assert np.allclose(w1.value, w2.value) and np.allclose(w1.value, w3.value)

    for i in range(10):
        checkpoint = learner1.create_checkpoint()

        v =  np.float32(np.random.rand(100,100))
    
        learner1.update({w1: v}, 1)
        learner2.update({w2: v}, 1)
        assert not np.allclose(w1.value, w2.value)

        learner3.restore_from_checkpoint(checkpoint)
        learner3.update({w3: v}, 1)
        assert np.allclose(w1.value, w3.value)
Esempio n. 2
0
    def __call__(self,
                 num_classes=10,
                 act_type=relu,
                 mdl_conv1a_nf=40,
                 mdl_conv1b_nf=60,
                 mdl_conv2a_nf=50,
                 mdl_conv2b_nf=75,
                 mdl_fc1_nh=75,
                 mdl_drop2a_p=0.033,
                 mdl_drop2b_p=0.097,
                 mdl_drop3_p=0.412,
                 **kwargs):

        input_var = input_variable((1, self.img_h, self.img_w), np.float32)
        label_var = input_variable((self.n_dim), np.float32)

        conv1a = Convolution(filter_shape=(3, 3), num_filters=int(mdl_conv1a_nf), activation=act_type, init=glorot_uniform(), pad=True, name='conv1a')(input_var)
        conv1b = Convolution(filter_shape=(3, 3), num_filters=int(mdl_conv1b_nf), activation=act_type, init=glorot_uniform(), pad=True, name='conv1b')(conv1a)
        pool1 = MaxPooling(filter_shape=(2, 2), strides=(2, 2), name='pool1')(conv1b)

        conv2a = Convolution(filter_shape=(3, 3), num_filters=int(mdl_conv2a_nf), activation=act_type, init=glorot_uniform(), pad=True, name='conv2a')(pool1)
        drop2a = Dropout(prob=mdl_drop2a_p, name="drop2a")(conv2a)
        conv2b = Convolution(filter_shape=(3, 3), num_filters=int(mdl_conv2b_nf), activation=act_type, init=glorot_uniform(), pad=True, name='conv2b')(drop2a)
        drop2b = Dropout(prob=mdl_drop2a_p, name="drop2a")(conv2b)
        pool2 = MaxPooling(filter_shape=(2, 2), strides=(2, 2), name='pool2')(drop2b)

        fc1 = Dense(shape=int(mdl_fc1_nh), init=glorot_uniform(), activation=act_type, name='fc1')(pool2)
        drop3 = Dropout(prob=mdl_drop3_p, name="drop3")(fc1)
        #fc2 = Dense(shape=num_classes, init=glorot_uniform(), activation=softmax, name='fc2')(drop3)
        fc2 = Dense(shape=num_classes, init=glorot_uniform(), activation=None, name='fc2')(drop3)

        return input_var, label_var, fc2
Esempio n. 3
0
def create_basic_model(input, out_dims):

    convolutional_layer_1 = Convolution((5, 5),
                                        16,
                                        init=glorot_uniform(),
                                        activation=relu,
                                        pad=True,
                                        strides=(1, 1))(input)
    pooling_layer_1 = MaxPooling((2, 2), strides=(1, 1))(convolutional_layer_1)

    convolutional_layer_2 = Convolution((5, 5),
                                        16,
                                        init=glorot_uniform(),
                                        activation=relu,
                                        pad=True,
                                        strides=(1, 1))(pooling_layer_1)
    pooling_layer_2 = MaxPooling((3, 3), strides=(2, 2))(convolutional_layer_2)
    #
    convolutional_layer_3 = Convolution((9, 9),
                                        16,
                                        init=glorot_uniform(),
                                        activation=relu,
                                        pad=True,
                                        strides=(1, 1))(pooling_layer_2)
    pooling_layer_3 = MaxPooling((3, 3), strides=(2, 2))(convolutional_layer_3)

    fully_connected_layer = Dense(256, init=glorot_uniform())(pooling_layer_3)
    dropout_layer = Dropout(0.5)(fully_connected_layer)

    output_layer = Dense(out_dims, init=glorot_uniform(),
                         activation=None)(dropout_layer)

    return output_layer
Esempio n. 4
0
def test_noise_injection_with_checkpointing():
    from cntk import initializer
    shape = (100,100)

    w1 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))
    w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123))

    lr=learning_rate_schedule(0.5, UnitType.sample)
    m=C.momentum_schedule(0.99)

    learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner2 = C.momentum_sgd([w2], lr, m, gaussian_noise_injection_std_dev=0.5)
    learner3 = C.momentum_sgd([w3], lr, m, gaussian_noise_injection_std_dev=0.5)

    assert np.allclose(w1.value, w2.value) and np.allclose(w1.value, w3.value)

    for i in range(10):
        checkpoint = learner1.create_checkpoint()

        v =  np.float32(np.random.rand(100,100))

        learner1.update({w1: v}, 1)
        learner2.update({w2: v}, 1)
        assert not np.allclose(w1.value, w2.value)

        learner3.restore_from_checkpoint(checkpoint)
        learner3.update({w3: v}, 1)
        assert np.allclose(w1.value, w3.value)
Esempio n. 5
0
def create_vgg9_model(input, num_classes):
    with default_options(activation=relu):
        model = Sequential([
            LayerStack(3, lambda i: [
                Convolution((3,3), [64,96,128][i], init=glorot_uniform(), pad=True),
                Convolution((3,3), [64,96,128][i], init=glorot_uniform(), pad=True),
                MaxPooling((3,3), strides=(2,2))
            ]),
            LayerStack(2, lambda : [
                Dense(1024, init=glorot_uniform())
            ]),
            Dense(num_classes, init=glorot_uniform(), activation=None)
        ])

    return model(input)
Esempio n. 6
0
def create_vgg9_model(input, num_classes):
    with default_options(activation=relu):
        model = Sequential([
            LayerStack(3, lambda i: [
                Convolution((3,3), [64,96,128][i], init=glorot_uniform(), pad=True),
                Convolution((3,3), [64,96,128][i], init=glorot_uniform(), pad=True),
                MaxPooling((3,3), strides=(2,2))
            ]),
            LayerStack(2, lambda : [
                Dense(1024, init=glorot_uniform())
            ]),
            Dense(num_classes, init=glorot_uniform(), activation=None)
        ])

    return model(input)
Esempio n. 7
0
def frcn_predictor(features, rois, n_classes, model_path):
    # Load the pretrained classification net and find nodes

    loaded_model = load_model(model_path)
    feature_node = find_by_name(loaded_model, feature_node_name)
    conv_node    = find_by_name(loaded_model, last_conv_node_name)
    pool_node    = find_by_name(loaded_model, pool_node_name)
    last_node    = find_by_name(loaded_model, last_hidden_node_name)

    # Clone the conv layers and the fully connected layers of the network
    conv_layers = combine([conv_node.owner]).clone(CloneMethod.freeze, {feature_node: placeholder()})
    fc_layers = combine([last_node.owner]).clone(CloneMethod.clone, {pool_node: placeholder()})

    # Create the Fast R-CNN model
    feat_norm = features - Constant(114)
    conv_out  = conv_layers(feat_norm)
    roi_out   = roipooling(conv_out, rois, (roi_dim, roi_dim))
    fc_out    = fc_layers(roi_out)

    # z = Dense(rois[0], num_classes, map_rank=1)(fc_out)  # --> map_rank=1 is not yet supported
    W = parameter(shape=(4096, n_classes), init=glorot_uniform())
    b = parameter(shape=n_classes, init=0)
    z = times(fc_out, W) + b

    return z
def frcn_predictor(features, rois, n_classes, base_path):
    # model specific variables for AlexNet
    model_file = base_path + "/../../../resources/cntk/AlexNet.model"
    roi_dim = 6
    feature_node_name = "features"
    last_conv_node_name = "conv5.y"
    pool_node_name = "pool3"
    last_hidden_node_name = "h2_d"

    # Load the pretrained classification net and find nodes
    print("Loading pre-trained model...")
    loaded_model = load_model(model_file)
    print("Loading pre-trained model... DONE.")
    feature_node = find_by_name(loaded_model, feature_node_name)
    conv_node    = find_by_name(loaded_model, last_conv_node_name)
    pool_node    = find_by_name(loaded_model, pool_node_name)
    last_node    = find_by_name(loaded_model, last_hidden_node_name)

    # Clone the conv layers and the fully connected layers of the network
    conv_layers = combine([conv_node.owner]).clone(CloneMethod.freeze, {feature_node: placeholder()})
    fc_layers   = combine([last_node.owner]).clone(CloneMethod.clone,  {pool_node: placeholder()})

    # Create the Fast R-CNN model
    feat_norm = features - constant(114)
    conv_out  = conv_layers(feat_norm)
    roi_out   = roipooling(conv_out, rois, (roi_dim, roi_dim))
    fc_out    = fc_layers(roi_out)
    #fc_out.set_name("fc_out")

    # z = Dense(rois[0], num_classes, map_rank=1)(fc_out)  # --> map_rank=1 is not yet supported
    W = parameter(shape=(4096, n_classes), init=glorot_uniform())
    b = parameter(shape=n_classes, init=0)
    z = times(fc_out, W) + b
    return z, fc_out
Esempio n. 9
0
def linear_layer(input_var, output_dim):
    times_param = parameter(shape=(list(input_var.shape) + [output_dim]),
                            init=glorot_uniform())
    bias_param = parameter(shape=(output_dim), init=0)

    t = times(input_var, times_param)
    return bias_param + t
Esempio n. 10
0
def frcn_predictor(features, rois, n_classes, model_path):
    # Load the pretrained classification net and find nodes
    loaded_model = load_model(model_path)
    feature_node = find_by_name(loaded_model, feature_node_name)
    conv_node = find_by_name(loaded_model, last_conv_node_name)
    pool_node = find_by_name(loaded_model, pool_node_name)
    last_node = find_by_name(loaded_model, last_hidden_node_name)

    # Clone the conv layers and the fully connected layers of the network
    conv_layers = combine([conv_node.owner
                           ]).clone(CloneMethod.freeze,
                                    {feature_node: placeholder()})
    fc_layers = combine([last_node.owner]).clone(CloneMethod.clone,
                                                 {pool_node: placeholder()})

    # Create the Fast R-CNN model
    feat_norm = features - Constant(114)
    conv_out = conv_layers(feat_norm)
    roi_out = roipooling(conv_out, rois, C.MAX_POOLING, (roi_dim, roi_dim),
                         0.0625)
    fc_out = fc_layers(roi_out)

    # z = Dense(rois[0], num_classes, map_rank=1)(fc_out)  # --> map_rank=1 is not yet supported
    W = parameter(shape=(4096, n_classes), init=glorot_uniform())
    b = parameter(shape=n_classes, init=0)
    z = times(fc_out, W) + b

    return z
Esempio n. 11
0
def LSTM(shape,
         cell_shape=None,
         activation=default_override_or(tanh),
         use_peepholes=default_override_or(False),
         init=default_override_or(glorot_uniform()),
         init_bias=default_override_or(0),
         enable_self_stabilization=default_override_or(False),
         name=''):

    activation = get_default_override(LSTM, activation=activation)
    use_peepholes = get_default_override(LSTM, use_peepholes=use_peepholes)
    init = get_default_override(LSTM, init=init)
    init_bias = get_default_override(LSTM, init_bias=init_bias)
    enable_self_stabilization = get_default_override(
        LSTM, enable_self_stabilization=enable_self_stabilization)

    return _RecurrentBlock('LSTM',
                           shape,
                           cell_shape,
                           activation=activation,
                           use_peepholes=use_peepholes,
                           init=init,
                           init_bias=init_bias,
                           enable_self_stabilization=enable_self_stabilization,
                           name=name)
Esempio n. 12
0
def GRU(shape, cell_shape=None, activation=default_override_or(tanh),
        init=default_override_or(glorot_uniform()), init_bias=default_override_or(0),
        enable_self_stabilization=default_override_or(False),
        name=''): # (prev_h, x) -> (h)
    '''
    GRU(shape, cell_shape=None, activation=tanh, init=glorot_uniform(), init_bias=0, enable_self_stabilization=False, name='')

    Layer factory function to create a GRU block for use inside a recurrence.

    Args:
        shape (`int` or `tuple` of `ints`): vector or tensor dimension of the output of this layer
        cell_shape (tuple, defaults to `None`): 
        activation (:class:`~cntk.ops.functions.Function`, defaults to tanh): function to apply at the end, e.g. `relu`
        init (scalar or NumPy array or :mod:`cntk.initializer`, defaults to `glorot_uniform`): initial value of weights `W`
        init_bias (scalar or NumPy array or :mod:`cntk.initializer`, defaults to 0): initial value of weights `b`
        enable_self_stabilization (bool, defaults to `False`):
        name (str, defaults to ''): the name of the Function instance in the network

    Returns:
        cntk.ops.functions.Function:
        A function (prev_h, input) -> h)
    '''

    activation                = get_default_override(GRU, activation=activation)
    init                      = get_default_override(GRU, init=init)
    init_bias                 = get_default_override(GRU, init_bias=init_bias)
    enable_self_stabilization = get_default_override(GRU, enable_self_stabilization=enable_self_stabilization)

    return _RecurrentBlock('GRU', shape, cell_shape, activation=activation, use_peepholes=False,
                           init=init, init_bias=init_bias,
                           enable_self_stabilization=enable_self_stabilization, name=name)
Esempio n. 13
0
def RNNUnit(shape,
            cell_shape=None,
            activation=default_override_or(sigmoid),
            init=default_override_or(glorot_uniform()),
            init_bias=default_override_or(0),
            enable_self_stabilization=default_override_or(False),
            name=''):
    '''
    RNNUnit(shape, cell_shape=None, activation=sigmoid, init=glorot_uniform(), init_bias=0, enable_self_stabilization=False, name='')

    This is a deprecated name for :func:`~cntk.layers.blocks.RNNStep`. Use that name instead.
    '''

    activation = get_default_override(RNNUnit, activation=activation)
    init = get_default_override(RNNUnit, init=init)
    init_bias = get_default_override(RNNUnit, init_bias=init_bias)
    enable_self_stabilization = get_default_override(
        RNNUnit, enable_self_stabilization=enable_self_stabilization)

    warnings.warn(
        'This name will be removed in future versions. Please use '
        'RNNStep(...) instead, which is identical except for its name',
        DeprecationWarning)

    return _RecurrentBlock('RNNStep',
                           shape,
                           cell_shape,
                           activation=activation,
                           use_peepholes=False,
                           init=init,
                           init_bias=init_bias,
                           enable_self_stabilization=enable_self_stabilization,
                           name=name)
Esempio n. 14
0
def attention_score(att_dim, init=glorot_uniform(), name=''):
    """
  Compute the attention score,
  where each of the input will be projected to a new dimention space (att_dim) via Wi/Wm
  """
    sim = project_cosine(att_dim, init, name=name + '_sim')
    return sequence.softmax(10 * sim, name=name)
Esempio n. 15
0
def resnet_classifer(input, num_classes):
    conv_w_scale = 7.07
    conv_b_value = 0

    fc1_w_scale = 0.4
    fc1_b_value = 0

    sc_value = 1
    bn_time_const = 4096

    kernel_width = 3
    kernel_height = 3

    conv1_w_scale = 0.26
    c_map1 = 16

    conv1 = conv_bn_relu_layer(input, c_map1, kernel_width, kernel_height, 1,
                               1, conv1_w_scale, conv_b_value, sc_value,
                               bn_time_const)
    rn1_1 = resnet_node2(conv1, c_map1, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const)
    rn1_2 = resnet_node2(rn1_1, c_map1, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const)
    rn1_3 = resnet_node2(rn1_2, c_map1, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const)

    c_map2 = 32
    rn2_1_wProj = get_projection_map(c_map2, c_map1)
    rn2_1 = resnet_node2_inc(rn1_3, c_map2, kernel_width, kernel_height,
                             conv1_w_scale, conv_b_value, sc_value,
                             bn_time_const, rn2_1_wProj)
    rn2_2 = resnet_node2(rn2_1, c_map2, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const)
    rn2_3 = resnet_node2(rn2_2, c_map2, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const)

    c_map3 = 64
    rn3_1_wProj = get_projection_map(c_map3, c_map2)
    rn3_1 = resnet_node2_inc(rn2_3, c_map3, kernel_width, kernel_height,
                             conv1_w_scale, conv_b_value, sc_value,
                             bn_time_const, rn3_1_wProj)
    rn3_2 = resnet_node2(rn3_1, c_map3, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const)
    rn3_3 = resnet_node2(rn3_2, c_map3, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const)

    # Global average pooling
    poolw = 8
    poolh = 8
    poolh_stride = 1
    poolv_stride = 1

    pool = pooling(rn3_3, AVG_POOLING, (1, poolh, poolw),
                   (1, poolv_stride, poolh_stride))
    out_times_params = parameter(shape=(c_map3, 1, 1, num_classes),
                                 init=glorot_uniform())
    out_bias_params = parameter(shape=(num_classes), init=0)
    t = times(pool, out_times_params)
    return t + out_bias_params
Esempio n. 16
0
File: nn.py Progetto: zgsxwsdxg/CNTK
def linear_layer(input_var, output_dim):
    input_dim = input_var.shape[0]
    times_param = parameter(shape=(input_dim, output_dim),
                            init=glorot_uniform())
    bias_param = parameter(shape=(output_dim), init=0)

    t = times(input_var, times_param)
    return bias_param + t
Esempio n. 17
0
def WeightDroppedLSTM(shape,
                      dropout_rate,
                      cell_shape=None,
                      activation=default_override_or(tanh),
                      use_peepholes=default_override_or(False),
                      init=default_override_or(glorot_uniform()),
                      init_bias=default_override_or(0),
                      enable_self_stabilization=default_override_or(False),
                      seed=SentinelValueForAutoSelectRandomSeed,
                      name=''):
    '''
    WDLSTM(shape, cell_shape=None, activation=tanh, use_peepholes=False, init=glorot_uniform(), init_bias=0, enable_self_stabilization=False, name='')

    Layer factory function to create an LSTM block for use inside a recurrence.
    The LSTM block implements one step of the recurrence and is stateless. It accepts the previous state as its first two arguments,
    and outputs its new state as a two-valued tuple ``(h,c)``.

    Example:
     >>> # a typical recurrent LSTM layer
     >>> from cntkx.layers import *
     >>> lstm_layer = Recurrence(WeightDroppedLSTM(500))

    Args:
        shape (`int` or `tuple` of `ints`): vector or tensor dimension of the output of this layer
        cell_shape (tuple, defaults to `None`): if given, then the output state is first computed at `cell_shape`
         and linearly projected to `shape`
        activation (:class:`~cntk.ops.functions.Function`, defaults to :func:`~cntk.ops.tanh`): function to apply at the end, e.g. `relu`
        use_peepholes (bool, defaults to `False`):
        init (scalar or NumPy array or :mod:`cntk.initializer`, defaults to `glorot_uniform`): initial value of weights `W`
        init_bias (scalar or NumPy array or :mod:`cntk.initializer`, defaults to 0): initial value of weights `b`
        enable_self_stabilization (bool, defaults to `False`): if `True` then add a :func:`~cntk.layers.blocks.Stabilizer`
         to all state-related projections (but not the data input)
        name (str, defaults to ''): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`:
        A function ``(prev_h, prev_c, input) -> (h, c)`` that implements one step of a recurrent LSTM layer.
    '''

    activation = get_default_override(WeightDroppedLSTM, activation=activation)
    use_peepholes = get_default_override(WeightDroppedLSTM,
                                         use_peepholes=use_peepholes)
    init = get_default_override(WeightDroppedLSTM, init=init)
    init_bias = get_default_override(WeightDroppedLSTM, init_bias=init_bias)
    enable_self_stabilization = get_default_override(
        WeightDroppedLSTM, enable_self_stabilization=enable_self_stabilization)

    return _RecurrentBlock('WeightDroppedLSTM',
                           shape,
                           cell_shape,
                           activation=activation,
                           use_peepholes=use_peepholes,
                           init=init,
                           init_bias=init_bias,
                           dropout_rate=dropout_rate,
                           seed=seed,
                           enable_self_stabilization=enable_self_stabilization,
                           name=name)
Esempio n. 18
0
File: nn.py Progetto: hahatt/CNTK
def linear_layer(input_var, output_dim):
    shape = input_var.shape()

    input_dim = shape[0]
    times_param = parameter(shape=(input_dim, output_dim), init=glorot_uniform())
    bias_param = parameter(shape=(output_dim), init=0)

    t = times(input_var, times_param)
    return bias_param + t
Esempio n. 19
0
    def dot_attention(self, inputs, memory, dim):
        '''
        @inputs: [#,c][d] a sequence need attention
        @memory(key): [#,q][d] a sequence input refers to compute similarity(weight)
        @value: [#,q][d] a sequence input refers to weighted sum
        @output: [#,c][d] attention vector
        '''
        input_ph = C.placeholder()
        input_mem = C.placeholder()
        with C.layers.default_options(
                bias=False,
                activation=C.relu):  # all the projections have no bias
            attn_proj_enc = C.layers.Dense(dim,
                                           init=glorot_uniform(),
                                           input_rank=1,
                                           name="Wqu")
            attn_proj_dec = C.layers.Dense(dim,
                                           init=glorot_uniform(),
                                           input_rank=1)

        inputs_ = attn_proj_enc(input_ph)  # [#,c][d]
        memory_ = attn_proj_dec(input_mem)  # [#,q][d]
        unpack_memory, mem_mask = C.sequence.unpack(
            memory_, 0).outputs  # [#][*=q, d], [#][*=q]
        unpack_memory_expand = C.sequence.broadcast_as(unpack_memory,
                                                       inputs_)  # [#,c][*=q,d]

        matrix = C.times_transpose(inputs_, unpack_memory_expand) / (
            dim**0.5)  # [#,c][*=q]
        mem_mask_expand = C.sequence.broadcast_as(mem_mask,
                                                  inputs_)  # [#,c][*=q]
        matrix = C.element_select(mem_mask_expand, matrix,
                                  C.constant(-1e+30))  # [#,c][*=q]
        logits = C.reshape(C.softmax(matrix), (-1, 1))  # [#,c][*=q,1]
        # [#,c][*=q, d]
        memory_expand = C.sequence.broadcast_as(
            C.sequence.unpack(input_mem, 0, no_mask_output=True), input_ph)
        weighted_att = C.reshape(C.reduce_sum(logits * memory_expand, axis=0),
                                 (-1, ))  # [#,c][d]

        return C.as_block(C.combine(weighted_att,
                                    logits), [(input_ph, inputs),
                                              (input_mem, memory)],
                          'dot attention', 'dot attention')
Esempio n. 20
0
    def LSTMCell(x, y, dh, dc):
        '''LightLSTM Cell'''

        b = C.parameter(shape=(4 * cell_dim), init=0)
        W = C.parameter(shape=(input_dim, 4 * cell_dim), init=glorot_uniform())
        H = C.parameter(shape=(cell_dim, 4 * cell_dim), init=glorot_uniform())

        # projected contribution from input x, hidden, and bias
        proj4 = b + C.times(x, W) + C.times(dh, H)

        it_proj = C.slice(proj4, -1, 0 * cell_dim, 1 * cell_dim)
        bit_proj = C.slice(proj4, -1, 1 * cell_dim, 2 * cell_dim)
        ft_proj = C.slice(proj4, -1, 2 * cell_dim, 3 * cell_dim)
        ot_proj = C.slice(proj4, -1, 3 * cell_dim, 4 * cell_dim)

        it = C.sigmoid(it_proj)  # input gate
        bit = it * C.tanh(bit_proj)

        ft = C.sigmoid(ft_proj)  # forget gate
        bft = ft * dc

        ct = bft + bit
        ot = C.sigmoid(ot_proj)  # output gate
        ht = ot * C.tanh(ct)

        # projected contribution from input y, hidden, and bias
        proj4_2 = b + C.times(y, W) + C.times(ht, H)

        it_proj_2 = C.slice(proj4_2, -1, 0 * cell_dim, 1 * cell_dim)
        bit_proj_2 = C.slice(proj4_2, -1, 1 * cell_dim, 2 * cell_dim)
        ft_proj_2 = C.slice(proj4_2, -1, 2 * cell_dim, 3 * cell_dim)
        ot_proj_2 = C.slice(proj4_2, -1, 3 * cell_dim, 4 * cell_dim)

        it_2 = C.sigmoid(it_proj_2)  # input gate
        bit_2 = it_2 * C.tanh(bit_proj_2)

        ft_2 = C.sigmoid(ft_proj_2)  # forget gate
        bft_2 = ft_2 * ct

        ct2 = bft_2 + bit_2
        ot_2 = C.sigmoid(ot_proj_2)  # output gate
        ht2 = ot_2 * C.tanh(ct2)
        return (ht, ct, ht2, ct2)
Esempio n. 21
0
def IndyLSTM(shape,
             activation=default_override_or(tanh),
             init=default_override_or(glorot_uniform()),
             init_bias=default_override_or(0),
             enable_self_stabilization=default_override_or(False),
             name=''):
    """
    Implementation of Independently Recurrent Long Short-term Memory cells: IndyLSTMs by Gonnet and Deselaers.
    Paper can be found at https://arxiv.org/abs/1903.08023

    IndyLSTM differ from regular LSTM cells in that the recurrent weights are not modeled as a full matrix,
    but as a diagonal matrix, i.e. the output and state of each LSTM cell depends on the inputs and its
    own output/state, as opposed to the input and the outputs/states of all the cells in the layer.
    The number of parameters per IndyLSTM layer, and thus the number of FLOPS per evaluation, is linear in the
    number of nodes in the layer, as opposed to quadratic for regular LSTM layers, resulting in potentially both
    smaller and faster model.

    Example:
     >>> # a gated recurrent layer
     >>> from cntkx.layers import *
     >>> indy_lstm_layer = Recurrence(IndyLSTM(500))

    Args:
        shape (`int` or `tuple` of `ints`): vector or tensor dimension of the output of this layer
        cell_shape (tuple, defaults to `None`): if given, then the output state is first computed at `cell_shape`
         and linearly projected to `shape`
        activation (:class:`~cntk.ops.functions.Function`, defaults to :func:`~cntk.ops.tanh`): function to apply at the end, e.g. `relu`
        init (scalar or NumPy array or :mod:`cntk.initializer`, defaults to `glorot_uniform`): initial value of weights `W`
        init_bias (scalar or NumPy array or :mod:`cntk.initializer`, defaults to 0): initial value of weights `b`
        enable_self_stabilization (bool, defaults to `False`): if `True` then add a :func:`~cntk.layers.blocks.Stabilizer`
         to all state-related projections (but not the data input)
        name (str, defaults to ''): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`:
        A function ``(prev_h, prev_c, input) -> (h, c)`` that implements one step of a recurrent IndyLSTM layer.
    """

    activation = get_default_override(IndyLSTM, activation=activation)
    init = get_default_override(IndyLSTM, init=init)
    init_bias = get_default_override(IndyLSTM, init_bias=init_bias)
    enable_self_stabilization = get_default_override(
        IndyLSTM, enable_self_stabilization=enable_self_stabilization)

    return _RecurrentBlock('IndyLSTM',
                           shape,
                           None,
                           activation=activation,
                           use_peepholes=False,
                           init=init,
                           init_bias=init_bias,
                           dropout_rate=0,
                           seed=SentinelValueForAutoSelectRandomSeed,
                           enable_self_stabilization=enable_self_stabilization,
                           name=name)
Esempio n. 22
0
def resnet_classifer(input, num_classes):
    conv_w_scale = 7.07
    conv_b_value = 0

    fc1_w_scale = 0.4
    fc1_b_value = 0

    sc_value = 1
    bn_time_const = 4096

    kernel_width = 3
    kernel_height = 3

    conv1_w_scale = 0.26
    c_map1 = 16

    conv1 = conv_bn_relu_layer(input, c_map1, kernel_width, kernel_height,
                               1, 1, conv1_w_scale, conv_b_value, sc_value, bn_time_const)
    rn1_1 = resnet_node2(conv1, c_map1, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const)
    rn1_2 = resnet_node2(rn1_1, c_map1, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const)
    rn1_3 = resnet_node2(rn1_2, c_map1, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const)

    c_map2 = 32
    rn2_1_wProj = get_projection_map(c_map2, c_map1)
    rn2_1 = resnet_node2_inc(rn1_3, c_map2, kernel_width, kernel_height,
                             conv1_w_scale, conv_b_value, sc_value, bn_time_const, rn2_1_wProj)
    rn2_2 = resnet_node2(rn2_1, c_map2, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const)
    rn2_3 = resnet_node2(rn2_2, c_map2, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const)

    c_map3 = 64
    rn3_1_wProj = get_projection_map(c_map3, c_map2)
    rn3_1 = resnet_node2_inc(rn2_3, c_map3, kernel_width, kernel_height,
                             conv1_w_scale, conv_b_value, sc_value, bn_time_const, rn3_1_wProj)
    rn3_2 = resnet_node2(rn3_1, c_map3, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const)
    rn3_3 = resnet_node2(rn3_2, c_map3, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const)

    # Global average pooling
    poolw = 8
    poolh = 8
    poolh_stride = 1
    poolv_stride = 1

    pool = pooling(rn3_3, AVG_POOLING, (1, poolh, poolw), (1, poolv_stride, poolh_stride))
    out_times_params = parameter(shape=(c_map3, 1, 1, num_classes), init=glorot_uniform())
    out_bias_params = parameter(shape=(num_classes), init=0)
    t = times(pool, out_times_params)
    return t + out_bias_params
Esempio n. 23
0
def IndRNN(shape, activation=default_override_or(relu),
            init=default_override_or(glorot_uniform()), init_bias=default_override_or(0),
            enable_self_stabilization=default_override_or(False), name=''):
    """
    IndRNN implementation found in "Independently Recurrent Neural Network (IndRNN): Building A Longer andDeeper RNN"
    by Li, et al (https://arxiv.org/abs/1803.04831).

    IndRNN are RNNS where neurons in each layer are independent from each other, and the cross-channel information is
    obtained through stacking multiple layers.

    It has been shown that an IndRNN can be easily regulated to prevent the gradient exploding and vanishing problems
    while allowing the networkto learn long-term dependencies. Moreover, an IndRNN can work with non-saturated
    activation functions such as relu (rectified linear unit) and be still trained robustly.
    Multiple IndRNNs can be stacked to construct a network that is deeper than the existing RNNs.
    Experimental results have shown that the proposed IndRNN is able to process very long
    sequences (over 5000 time steps), can be used to construct very deep networks (21 layers used in the experiment)
    and still be trained robustly. Better performances have been achieved on various tasks by using IndRNNs compared
    with the traditional RNN and LSTM.

    IndRNN also enables the usable of Relu activation which more efficient to compute than sigmoid and leads to
    faster convergence during training. You may consider to initialise the recurrent weights using a uniform
    distribution from 0 to 1.

    The original code is available at: https://github.com/Sunnydreamrain/IndRNN_Theano_Lasagne.

    Example:
     >>> # a plain relu RNN layer
     >>> from cntkx.layers import *
     >>> relu_rnn_layer = Recurrence(IndRNN(500))

    Args:
        shape (`int` or `tuple` of `ints`): vector or tensor dimension of the output of this layer
        activation (:class:`~cntk.ops.functions.Function`, defaults to signmoid): function to apply at the end, e.g. `relu`
        init (scalar or NumPy array or :mod:`cntk.initializer`, defaults to `glorot_uniform`): initial value of weights `W`
        init_bias (scalar or NumPy array or :mod:`cntk.initializer`, defaults to 0): initial value of weights `b`
        enable_self_stabilization (bool, defaults to `False`): if `True` then add a :func:`~cntk.layers.blocks.Stabilizer`
         to all state-related projections (but not the data input)
        name (str, defaults to ''): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`:
        A function ``(prev_h, input) -> h`` where ``h = activation(input @ W + prev_h * R + b)``
    """

    activation                = get_default_override(IndRNN, activation=activation)
    init                      = get_default_override(IndRNN, init=init)
    init_bias                 = get_default_override(IndRNN, init_bias=init_bias)
    enable_self_stabilization = get_default_override(IndRNN, enable_self_stabilization=enable_self_stabilization)

    return _RecurrentBlock('IndRNN', shape, None, activation=activation, use_peepholes=False,
                           init=init, init_bias=init_bias, dropout_rate=0, seed=SentinelValueForAutoSelectRandomSeed,
                           enable_self_stabilization=enable_self_stabilization, name=name)
Esempio n. 24
0
def linear_layer(input_var, output_dim):
    try:
        shape = input_var.shape()
    except AttributeError:
        input_var = input_var.output()
        shape = input_var.shape()

    input_dim = shape[0]
    times_param = parameter(shape=(input_dim, output_dim), init=glorot_uniform())
    bias_param = parameter(shape=(output_dim), init=0)

    t = times(input_var, times_param)
    return bias_param + t
Esempio n. 25
0
def project_cosine_sim(att_dim, init=glorot_uniform(), name=''):
    """
  Compute the project cosine similarity of two input sequences, where each of the input will be projected to a new dimention space (att_dim) via Wi/Wm
  """
    Wi = Parameter(_INFERRED + tuple((att_dim, )), init=init, name='Wi')
    Wm = Parameter(_INFERRED + tuple((att_dim, )), init=init, name='Wm')
    status = placeholder_variable(name='status')
    memory = placeholder_variable(name='memory')
    projected_status = times(status, Wi, name='projected_status')
    projected_memory = times(memory, Wm, name='projected_memory')
    sim = cosine_similarity(projected_status,
                            projected_memory,
                            name=name + '_sim')
    return seq_softmax(sim, name=name)
Esempio n. 26
0
def GRU(shape,
        cell_shape=None,
        activation=default_override_or(tanh),
        init=default_override_or(glorot_uniform()),
        init_bias=default_override_or(0),
        enable_self_stabilization=default_override_or(False),
        name=''):
    '''
    GRU(shape, cell_shape=None, activation=tanh, init=glorot_uniform(), init_bias=0, enable_self_stabilization=False, name='')

    Layer factory function to create a GRU block for use inside a recurrence.
    The GRU block implements one step of the recurrence and is stateless. It accepts the previous state as its first argument,
    and outputs its new state.

    Example:
     >>> # a gated recurrent layer
     >>> from cntk.layers import *
     >>> gru_layer = Recurrence(GRU(500))

    Args:
        shape (`int` or `tuple` of `ints`): vector or tensor dimension of the output of this layer
        cell_shape (tuple, defaults to `None`): if given, then the output state is first computed at `cell_shape`
         and linearly projected to `shape`
        activation (:class:`~cntk.ops.functions.Function`, defaults to :func:`~cntk.ops.tanh`): function to apply at the end, e.g. `relu`
        init (scalar or NumPy array or :mod:`cntk.initializer`, defaults to `glorot_uniform`): initial value of weights `W`
        init_bias (scalar or NumPy array or :mod:`cntk.initializer`, defaults to 0): initial value of weights `b`
        enable_self_stabilization (bool, defaults to `False`): if `True` then add a :func:`~cntk.layers.blocks.Stabilizer`
         to all state-related projections (but not the data input)
        name (str, defaults to ''): the name of the Function instance in the network

    Returns:
        cntk.ops.functions.Function:
        A function ``(prev_h, input) -> h`` that implements one step of a recurrent GRU layer.
    '''

    activation = get_default_override(GRU, activation=activation)
    init = get_default_override(GRU, init=init)
    init_bias = get_default_override(GRU, init_bias=init_bias)
    enable_self_stabilization = get_default_override(
        GRU, enable_self_stabilization=enable_self_stabilization)

    return _RecurrentBlock('GRU',
                           shape,
                           cell_shape,
                           activation=activation,
                           use_peepholes=False,
                           init=init,
                           init_bias=init_bias,
                           enable_self_stabilization=enable_self_stabilization,
                           name=name)
Esempio n. 27
0
def project_cosine(project_dim, init = glorot_uniform(), name=''):
  """
  Compute the project cosine similarity of two input sequences, 
  where each of the input will be projected to a new dimention space (project_dim) via Wi/Wm
  """
  Wi = Parameter(_INFERRED + (project_dim,), init = init, name='Wi')
  Wm = Parameter(_INFERRED + (project_dim,), init = init, name='Wm')

  status = placeholder(name='status')
  memory = placeholder(name='memory')

  projected_status = times(status, Wi, name = 'projected_status')   
  projected_memory = times(memory, Wm, name = 'projected_memory')
  status_br = sequence.broadcast_as(projected_status, projected_memory, name='status_broadcast')
  sim = cosine_distance(status_br, projected_memory, name= name)
  return sim
Esempio n. 28
0
def GRU(shape, cell_shape=None, activation=default_override_or(tanh),
        init=default_override_or(glorot_uniform()), init_bias=default_override_or(0),
        enable_self_stabilization=default_override_or(False),
        name=''): # (prev_h, x) -> (h)
    '''
    Layer factory function to create a GRU block for use inside a recurrence.
    Returns a function (prev_h, input) -> h).
    '''

    activation                = get_default_override(GRU, activation=activation)
    init                      = get_default_override(GRU, init=init)
    init_bias                 = get_default_override(GRU, init_bias=init_bias)
    enable_self_stabilization = get_default_override(GRU, enable_self_stabilization=enable_self_stabilization)

    return _RecurrentBlock('GRU', shape, cell_shape, activation=activation, use_peepholes=False,
                           init=init, init_bias=init_bias,
                           enable_self_stabilization=enable_self_stabilization, name=name)
Esempio n. 29
0
def conv_bn_layer(input, out_feature_map_count, kernel_width, kernel_height, h_stride, v_stride, w_scale, b_value, sc_value, bn_time_const):
    try:
        shape = input.shape()
    except AttributeError:
        input_var = input.output()
        shape = input_var.shape()
    num_in_channels = shape[0]
    #TODO: use RandomNormal to initialize, needs to be exposed in the python api
    conv_params = parameter(shape=(num_in_channels, kernel_height,
        kernel_width, out_feature_map_count), init=glorot_uniform(output_rank=-1, filter_rank=2))
    conv_func = convolution(conv_params, input, (num_in_channels, v_stride, h_stride))

    #TODO: initialize using b_value and sc_value, needs to be exposed in the python api
    bias_params = parameter(shape=(out_feature_map_count), init=b_value)
    scale_params = parameter(shape=(out_feature_map_count), init=sc_value)
    running_mean = constant((out_feature_map_count), 0.0)
    running_invstd = constant((out_feature_map_count), 0.0)
    return batch_normalization(conv_func, scale_params, bias_params, running_mean, running_invstd, True, bn_time_const, 0.0, 0.000000001)
Esempio n. 30
0
def RNNUnit(shape, cell_shape=None, activation=default_override_or(sigmoid),
            init=default_override_or(glorot_uniform()), init_bias=default_override_or(0),
            enable_self_stabilization=default_override_or(False),
            name=''): # (prev_h, x) -> (h)
    '''
    Layer factory function to create a plain RNN block for use inside a recurrence.
    Returns a function (prev_h, input) -> h):
     h = activation (W * input + R * prev_h + b)
    '''

    activation                = get_default_override(RNNUnit, activation=activation)
    init                      = get_default_override(RNNUnit, init=init)
    init_bias                 = get_default_override(RNNUnit, init_bias=init_bias)
    enable_self_stabilization = get_default_override(RNNUnit, enable_self_stabilization=enable_self_stabilization)

    return _RecurrentBlock('RNNUnit', shape, cell_shape, activation=activation, use_peepholes=False,
                           init=init, init_bias=init_bias,
                           enable_self_stabilization=enable_self_stabilization, name=name)
Esempio n. 31
0
def LSTM(shape, cell_shape=None, activation=default_override_or(tanh), use_peepholes=default_override_or(False),
         init=default_override_or(glorot_uniform()), init_bias=default_override_or(0),
         enable_self_stabilization=default_override_or(False),
         name=''):
    '''
    Layer factory function to create an LSTM block for use inside a recurrence.
    Returns a function (prev_h, prev_c, input) -> h).
    '''

    activation                = get_default_override(RNNUnit, activation=activation)
    use_peepholes             = get_default_override(LSTM, use_peepholes=use_peepholes)
    init                      = get_default_override(LSTM, init=init)
    init_bias                 = get_default_override(LSTM, init_bias=init_bias)
    enable_self_stabilization = get_default_override(LSTM, enable_self_stabilization=enable_self_stabilization)

    return _RecurrentBlock('LSTM', shape, cell_shape, activation=activation, use_peepholes=use_peepholes,
                           init=init, init_bias=init_bias,
                           enable_self_stabilization=enable_self_stabilization, name=name)
Esempio n. 32
0
def gru_cell(shape, init=glorot_uniform(), name=''):  # (x, (h,c))
    """ GRU cell function
  """
    shape = _as_tuple(shape)

    if len(shape) != 1:
        raise ValueError("gru_cell: shape must be vectors (rank-1 tensors)")

    # determine stacking dimensions
    cell_shape_stacked = shape * 2  # patched dims with stack_axis duplicated 2 times

    # parameters
    Wz = Parameter(cell_shape_stacked, init=init, name='Wz')
    Wr = Parameter(cell_shape_stacked, init=init, name='Wr')
    Wh = Parameter(cell_shape_stacked, init=init, name='Wh')
    Uz = Parameter(_INFERRED + shape, init=init, name='Uz')
    Ur = Parameter(_INFERRED + shape, init=init, name='Ur')
    Uh = Parameter(_INFERRED + shape, init=init, name='Uh')

    def create_s_placeholder():
        # we pass the known dimensions here, which makes dimension inference easier
        return Placeholder(shape=shape, name='S')  # (h, c)

    # parameters to model function
    x = Placeholder(name='gru_block_arg')
    prev_status = create_s_placeholder()

    # formula of model function
    Sn_1 = prev_status

    z = sigmoid(times(x, Uz, name='x*Uz') + times(Sn_1, Wz, name='Sprev*Wz'),
                name='z')
    r = sigmoid(times(x, Ur, name='x*Ur') + times(Sn_1, Wr, name='Sprev*Wr'),
                name='r')
    h = tanh(times(x, Uh, name='x*Uh') +
             times(element_times(Sn_1, r, name='Sprev*r'), Wh),
             name='h')
    s = plus(element_times((1 - z), h, name='(1-z)*h'),
             element_times(z, Sn_1, name='z*SPrev'),
             name=name)
    apply_x_s = combine([s])
    apply_x_s.create_placeholder = create_s_placeholder
    return apply_x_s
Esempio n. 33
0
def create_shallow_model(input, out_dims):
    
    convolutional_layer_1_1  = Convolution((7,7), 32, init=glorot_uniform(), activation=relu, pad=True, strides=(1,1))(input)
    convolutional_layer_1_2  = Convolution((25,25), 32, init=glorot_uniform(), activation=relu, pad=True, strides=(1,1))(convolutional_layer_1_1)

    pooling_layer_1  = MaxPooling((25,25), strides=(5,5))(convolutional_layer_1_2 )
    
    convolutional_layer_2_1 = Convolution((3,3), 32, init=glorot_uniform(), activation=relu, pad=True, strides=(1,1))(pooling_layer_1)
    pooling_layer_2 = MaxPooling((2,2), strides=(2,2))(convolutional_layer_2_1)
 
    fully_connected_layer_1  = Dense(512, init=glorot_uniform())(pooling_layer_2)   
    fully_connected_layer_2  = Dense(128, init=glorot_uniform())(fully_connected_layer_1)
    dropout_layer = Dropout(0.5)(fully_connected_layer_2)

    output_layer = Dense(out_dims, init=glorot_uniform(), activation=None)(dropout_layer)
    
    return output_layer
Esempio n. 34
0
def IndRNNStep(shape,
               cell_shape=None,
               activation=default_override_or(relu),
               init=default_override_or(glorot_uniform()),
               init_bias=default_override_or(0),
               enable_self_stabilization=default_override_or(False),
               name=''):

    activation = get_default_override(RNNStep, activation=activation)
    init = get_default_override(RNNStep, init=init)
    init_bias = get_default_override(RNNStep, init_bias=init_bias)
    enable_self_stabilization = get_default_override(
        RNNStep, enable_self_stabilization=enable_self_stabilization)

    return IndRNNBlock('RNNStep',
                       shape,
                       cell_shape,
                       activation=activation,
                       use_peepholes=False,
                       init=init,
                       init_bias=init_bias,
                       enable_self_stabilization=enable_self_stabilization,
                       name=name)
Esempio n. 35
0
def create_model_ext(input, ext_values, out_dims):

    # in VGG style
    #https://www.cs.toronto.edu/~frossard/post/vgg16/
    convolutional_layer_1_1 = Convolution((3, 3),
                                          16,
                                          init=glorot_uniform(),
                                          activation=relu,
                                          pad=True,
                                          strides=(1, 1))(input)
    convolutional_layer_1_2 = Convolution(
        (5, 5),
        32,
        init=glorot_uniform(),
        activation=relu,
        pad=True,
        strides=(1, 1))(convolutional_layer_1_1)
    pooling_layer_1 = MaxPooling((2, 2),
                                 strides=(2, 2))(convolutional_layer_1_2)

    convolutional_layer_2_1 = Convolution((3, 3),
                                          32,
                                          init=glorot_uniform(),
                                          activation=relu,
                                          pad=True,
                                          strides=(1, 1))(pooling_layer_1)
    convolutional_layer_2_2 = Convolution(
        (7, 7),
        64,
        init=glorot_uniform(),
        activation=relu,
        pad=True,
        strides=(1, 1))(convolutional_layer_2_1)
    pooling_layer_2 = MaxPooling((2, 2),
                                 strides=(1, 1))(convolutional_layer_2_2)

    convolutional_layer_3_1 = Convolution((3, 3),
                                          64,
                                          init=glorot_uniform(),
                                          activation=relu,
                                          pad=True,
                                          strides=(1, 1))(pooling_layer_2)
    convolutional_layer_3_2 = Convolution(
        (7, 7),
        96,
        init=glorot_uniform(),
        activation=relu,
        pad=True,
        strides=(1, 1))(convolutional_layer_3_1)
    pooling_layer_3 = MaxPooling((2, 2),
                                 strides=(1, 1))(convolutional_layer_3_2)

    convolutional_layer_4_1 = Convolution((3, 3),
                                          96,
                                          init=glorot_uniform(),
                                          activation=relu,
                                          pad=True,
                                          strides=(1, 1))(pooling_layer_3)
    pooling_layer_4 = MaxPooling((2, 2),
                                 strides=(1, 1))(convolutional_layer_4_1)

    ##
    fully_connected_layer_1 = Dense(512,
                                    init=glorot_uniform())(pooling_layer_4)
    dropout_layer_1 = Dropout(0.5)(fully_connected_layer_1)

    fully_connected_with_extra_values = splice(dropout_layer_1,
                                               ext_values,
                                               axis=0)

    fully_connected_layer_2 = Dense(
        256, init=glorot_uniform())(fully_connected_with_extra_values)
    fully_connected_layer_3 = Dense(
        128, init=glorot_uniform())(fully_connected_layer_2)
    dropout_layer_2 = Dropout(0.5)(fully_connected_layer_3)

    output_layer = Dense(out_dims, init=glorot_uniform(),
                         activation=None)(dropout_layer_2)

    return output_layer
Esempio n. 36
0
def linear_layer(input_var, output_dim):
    times_param = parameter(shape=(list(input_var.shape) + [output_dim]), init=glorot_uniform())
    bias_param = parameter(shape=(output_dim), init=0)

    t = times(input_var, times_param)
    return bias_param + t
Esempio n. 37
0
def embedding(input, embedding_dim):
    input_dim = input.shape[0]

    embedding_parameters = parameter(shape=(input_dim, embedding_dim), init=glorot_uniform())
    return times(input, embedding_parameters)
Esempio n. 38
0
_trace_layers = False
#_trace_layers = True  # uncomment this to log creation of graph through layers

_INFERRED = (InferredDimension,)  # as a tuple, makes life easier

# call this for all untested branches
def UntestedBranchError(name):
    raise NotImplementedError("Untested code branch: " + name)

# This record contains the defaults for a number of optional parameters to layers.
# These can be overwritten temporarily by saying
#    with default_options(init=..., ...):
#        # code block within which the changed defaults are active
_current_default_options = Record(
    init=glorot_uniform(),
    activation=None,                  # Dense() and Convolution() have no activation by default
    pad=False, # BUGBUG: not done for pooling at present. Need a special default? How to name?
    # ^^ This should be addressed by allowing configs per layer type.
    #    To be addressed as a per-layer default. See default_options below.
    bias=True,
    init_bias=0,
    enable_self_stabilization=False,  # Stabilizer() and LSTM()
    initial_state=None,               # Recurrence()
    use_peepholes=False               # LSTM()
)

_default_sentinel           = '(default)'           # This is a singleton sentinel value we recognize and replace in _initializer_for()
_default_sentinel_init      = '(init default)'      # use different ones for init andinit_bias so we can distinguish them in _initializer_for()
_default_sentinel_init_bias = '(init_bias default)'
# in function signatures we use symbols that indicate the default default in their name
Esempio n. 39
0
def LSTMP_cell_with_self_stabilization(input, prev_output, prev_cell_state):
    input_dim = input.shape[0]
    output_dim = prev_output.shape[0]
    cell_dim = prev_cell_state.shape[0]

    Wxo = parameter(shape=(input_dim, cell_dim), init=glorot_uniform())
    Wxi = parameter(shape=(input_dim, cell_dim), init=glorot_uniform())
    Wxf = parameter(shape=(input_dim, cell_dim), init=glorot_uniform())
    Wxc = parameter(shape=(input_dim, cell_dim), init=glorot_uniform())

    Bo = parameter(shape=(cell_dim), init=0)
    Bc = parameter(shape=(cell_dim), init=0)
    Bi = parameter(shape=(cell_dim), init=0)
    Bf = parameter(shape=(cell_dim), init=0)

    Whi = parameter(shape=(output_dim, cell_dim), init=glorot_uniform())
    Wci = parameter(shape=(cell_dim), init=glorot_uniform())

    Whf = parameter(shape=(output_dim, cell_dim), init=glorot_uniform())
    Wcf = parameter(shape=(cell_dim), init=glorot_uniform())

    Who = parameter(shape=(output_dim, cell_dim), init=glorot_uniform())
    Wco = parameter(shape=(cell_dim), init=glorot_uniform())

    Whc = parameter(shape=(output_dim, cell_dim), init=glorot_uniform())

    Wmr = parameter(shape=(cell_dim, output_dim), init=glorot_uniform())

    # Stabilization by routing input through an extra scalar parameter
    sWxo = parameter(init=0)
    sWxi = parameter(init=0)
    sWxf = parameter(init=0)
    sWxc = parameter(init=0)

    sWhi = parameter(init=0)
    sWci = parameter(init=0)

    sWhf = parameter(init=0)
    sWcf = parameter(init=0)
    sWho = parameter(init=0)
    sWco = parameter(init=0)
    sWhc = parameter(init=0)

    sWmr = parameter(init=0)

    expsWxo = exp(sWxo)
    expsWxi = exp(sWxi)
    expsWxf = exp(sWxf)
    expsWxc = exp(sWxc)

    expsWhi = exp(sWhi)
    expsWci = exp(sWci)

    expsWhf = exp(sWhf)
    expsWcf = exp(sWcf)
    expsWho = exp(sWho)
    expsWco = exp(sWco)
    expsWhc = exp(sWhc)

    expsWmr = exp(sWmr)

    Wxix = times(element_times(expsWxi, input), Wxi)
    Whidh = times(element_times(expsWhi, prev_output), Whi)
    Wcidc = element_times(Wci, element_times(expsWci, prev_cell_state))

    it = sigmoid(Wxix + Bi + Whidh + Wcidc)
    Wxcx = times(element_times(expsWxc, input), Wxc)
    Whcdh = times(element_times(expsWhc, prev_output), Whc)
    bit = element_times(it, tanh(Wxcx + Whcdh + Bc))
    Wxfx = times(element_times(expsWxf, input), Wxf)
    Whfdh = times(element_times(expsWhf, prev_output), Whf)
    Wcfdc = element_times(Wcf, element_times(expsWcf, prev_cell_state))

    ft = sigmoid(Wxfx + Bf + Whfdh + Wcfdc)
    bft = element_times(ft, prev_cell_state)

    ct = bft + bit

    Wxox = times(element_times(expsWxo, input), Wxo)
    Whodh = times(element_times(expsWho, prev_output), Who)
    Wcoct = element_times(Wco, element_times(expsWco, ct))

    ot = sigmoid(Wxox + Bo + Whodh + Wcoct)

    mt = element_times(ot, tanh(ct))
    return (times(element_times(expsWmr, mt), Wmr), ct)
Esempio n. 40
0
File: nn.py Progetto: hahatt/CNTK
def conv_bn_layer(input, out_feature_map_count, kernel_width, kernel_height, h_stride, v_stride, w_scale, b_value, sc_value, bn_time_const):
    shape = input.shape()
    num_in_channels = shape[0]
    #TODO: use RandomNormal to initialize, needs to be exposed in the python api
    conv_params = parameter(shape=(out_feature_map_count, num_in_channels, kernel_height, kernel_width), init=glorot_uniform(output_rank=-1, filter_rank=2))
    conv_func = convolution(conv_params, input, (num_in_channels, v_stride, h_stride))

    #TODO: initialize using b_value and sc_value, needs to be exposed in the python api
    bias_params = parameter(shape=(out_feature_map_count), init=b_value)
    scale_params = parameter(shape=(out_feature_map_count), init=sc_value)
    running_mean = constant((out_feature_map_count), 0.0)
    running_invstd = constant((out_feature_map_count), 0.0)
    return batch_normalization(conv_func, scale_params, bias_params, running_mean, running_invstd, True, bn_time_const, 0.0, 0.000000001)