Ejemplo n.º 1
0
def contractive_reward(labels, predictions_and_stop_probabilities):
    """
  Compute the contractive reward loss in paper 'ReasoNet: Learning to Stop Reading in Machine Comprehension'
  Args:
    labels: The lables
    predictions_and_stop_probabilities: A list of tuples, each tuple contains the prediction and stop probability of the coresponding step.
  """
    base = None
    avg_rewards = None
    for step in range(len(predictions_and_stop_probabilities)):
        pred = predictions_and_stop_probabilities[step][0]
        stop = predictions_and_stop_probabilities[step][1]
        if base is None:
            base = ops.element_times(pred, stop)
        else:
            base = ops.plus(ops.element_times(pred, stop), base)
    avg_rewards = ops.stop_gradient(sequence.reduce_sum(base * labels))
    base_reward = sequence.broadcast_as(avg_rewards, base, name='base_line')
    # While  the learner will mimize the loss by default, we want it to maxiumize the rewards
    # Maxium rewards => minimal -rewards
    # So we use (1-r/b) as the rewards instead of (r/b-1)
    step_cr = ops.stop_gradient(1 - ops.element_divide(labels, base_reward))
    normalized_contractive_rewards = ops.element_times(base, step_cr)
    rewards = sequence.reduce_sum(normalized_contractive_rewards) + avg_rewards
    return rewards
Ejemplo n.º 2
0
def fully_connected_layer(input, output_dim, device_id, nonlinearity):        
    input_dim = input.shape()[0]    
    times_param = parameter(shape=(input_dim,output_dim))    
    t = times(input,times_param)
    plus_param = parameter(shape=(output_dim,))
    p = plus(plus_param,t.output())    
    return nonlinearity(p.output());
Ejemplo n.º 3
0
def fully_connected_classifier_net(input, num_output_classes, hidden_layer_dim, num_hidden_layers, device, nonlinearity):
    classifier_root = fully_connected_layer(input, hidden_layer_dim, device, nonlinearity)
    for i in range(1, num_hidden_layers):
        classifier_root = fully_connected_layer(classifier_root.output(), hidden_layer_dim, device, nonlinearity)
    
    output_times_param = parameter(shape=(hidden_layer_dim,num_output_classes))
    output_plus_param = parameter(shape=(num_output_classes,))
    t = times(classifier_root.output(),output_times_param)
    classifier_root = plus(output_plus_param,t.output()) 
    return classifier_root;
Ejemplo n.º 4
0
    def plus(cntk_layer, inputs):
        '''
         Setup plus op with given parameters

        Args:
            cntk_layer (:class:`~cntk.contrib.crosstalkcaffe.unimodel.cntkmodel.CntkLayersDefinition`):
                the layer definition of dense op
            inputs (list): a list contains all :class:`~cntk.ops.functions.Function` or
                :class:`~cntk.input`

        Return:
            :func:`~cntk.ops.functions.Function`: instaced cntk dense op
        '''
        sanitize_left = ops.sanitize_input(inputs[0])
        sanitize_right = ops.sanitize_input(inputs[1])
        return ops.plus(sanitize_left, sanitize_right, name=cntk_layer.op_name)
Ejemplo n.º 5
0
    def plus(cntk_layer, inputs):
        '''
         Setup plus op with given parameters

        Args:
            cntk_layer (:class:`~cntk.contrib.crosstalkcaffe.unimodel.cntkmodel.CntkLayersDefinition`):
                the layer definition of dense op
            inputs (list): a list contains all :class:`~cntk.ops.functions.Function` or
                :class:`~cntk.input`

        Return:
            :func:`~cntk.ops.functions.Function`: instaced cntk dense op
        '''
        sanitize_left = ops.sanitize_input(inputs[0])
        sanitize_right = ops.sanitize_input(inputs[1])
        return ops.plus(sanitize_left, sanitize_right, name=cntk_layer.op_name)
Ejemplo n.º 6
0
def resnet_basic(layer_input, filter_size, num_filters, strides, prefix):
    """
    Returns a resnet basic building block
    """
    c1 = conv_bn_relu(layer_input,
                      filter_size,
                      num_filters,
                      strides,
                      name='{}_1'.format(prefix))
    c2 = conv_bn(c1,
                 filter_size,
                 num_filters,
                 strides,
                 name='{}_2'.format(prefix))
    p = plus(c2, layer_input, name='{}_res'.format(prefix))
    return relu(p, name='{}_relu'.format(prefix))
Ejemplo n.º 7
0
def gru_cell(shape, init=glorot_uniform(), name=''):  # (x, (h,c))
    """ GRU cell function
  """
    shape = _as_tuple(shape)

    if len(shape) != 1:
        raise ValueError("gru_cell: shape must be vectors (rank-1 tensors)")

    # determine stacking dimensions
    cell_shape_stacked = shape * 2  # patched dims with stack_axis duplicated 2 times

    # parameters
    Wz = Parameter(cell_shape_stacked, init=init, name='Wz')
    Wr = Parameter(cell_shape_stacked, init=init, name='Wr')
    Wh = Parameter(cell_shape_stacked, init=init, name='Wh')
    Uz = Parameter(_INFERRED + shape, init=init, name='Uz')
    Ur = Parameter(_INFERRED + shape, init=init, name='Ur')
    Uh = Parameter(_INFERRED + shape, init=init, name='Uh')

    def create_s_placeholder():
        # we pass the known dimensions here, which makes dimension inference easier
        return Placeholder(shape=shape, name='S')  # (h, c)

    # parameters to model function
    x = Placeholder(name='gru_block_arg')
    prev_status = create_s_placeholder()

    # formula of model function
    Sn_1 = prev_status

    z = sigmoid(times(x, Uz, name='x*Uz') + times(Sn_1, Wz, name='Sprev*Wz'),
                name='z')
    r = sigmoid(times(x, Ur, name='x*Ur') + times(Sn_1, Wr, name='Sprev*Wr'),
                name='r')
    h = tanh(times(x, Uh, name='x*Uh') +
             times(element_times(Sn_1, r, name='Sprev*r'), Wh),
             name='h')
    s = plus(element_times((1 - z), h, name='(1-z)*h'),
             element_times(z, Sn_1, name='z*SPrev'),
             name=name)
    apply_x_s = combine([s])
    apply_x_s.create_placeholder = create_s_placeholder
    return apply_x_s
Ejemplo n.º 8
0
def resnet_basic_inc(layer_input, filter_size, num_filters, strides, prefix):
    """
    Returns a ResNet basic bulding block with projection
    Use when there is a change in layer_input/output channels
    """
    ones = np.ones_like(strides)
    c1 = conv_bn_relu(layer_input,
                      filter_size,
                      num_filters,
                      strides,
                      name='{}_1'.format(prefix))
    c2 = conv_bn(c1,
                 filter_size,
                 num_filters,
                 ones,
                 name='{}_2'.format(prefix))
    s = conv_bn(layer_input,
                ones,
                num_filters,
                strides,
                name='{}_3'.format(prefix))
    p = plus(c2, s, name='{}_res'.format(prefix))
    return relu(p, name='{}_relu'.format(prefix))
Ejemplo n.º 9
0
def resnet_classifer(input, num_classes, device, output_name):
    conv_w_scale = 7.07
    conv_b_value = 0

    fc1_w_scale = 0.4
    fc1_b_value = 0

    sc_value = 1
    bn_time_const = 4096

    kernel_width = 3
    kernel_height = 3

    conv1_w_scale = 0.26
    c_map1 = 16

    conv1 = conv_bn_relu_layer(input, c_map1, kernel_width, kernel_height, 1,
                               1, conv1_w_scale, conv_b_value, sc_value,
                               bn_time_const, device)
    rn1_1 = resnet_node2(conv1.output(), c_map1, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const,
                         device)
    rn1_2 = resnet_node2(rn1_1.output(), c_map1, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const,
                         device)
    rn1_3 = resnet_node2(rn1_2.output(), c_map1, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const,
                         device)

    c_map2 = 32
    rn2_1_wProj = get_projection_map(c_map2, c_map1, device)
    rn2_1 = resnet_node2_inc(rn1_3.output(), c_map2, kernel_width,
                             kernel_height, conv1_w_scale, conv_b_value,
                             sc_value, bn_time_const, rn2_1_wProj, device)
    rn2_2 = resnet_node2(rn2_1.output(), c_map2, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const,
                         device)
    rn2_3 = resnet_node2(rn2_2.output(), c_map2, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const,
                         device)

    c_map3 = 64
    rn3_1_wProj = get_projection_map(c_map3, c_map2, device)
    rn3_1 = resnet_node2_inc(rn2_3.output(), c_map3, kernel_width,
                             kernel_height, conv1_w_scale, conv_b_value,
                             sc_value, bn_time_const, rn3_1_wProj, device)
    rn3_2 = resnet_node2(rn3_1.output(), c_map3, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const,
                         device)
    rn3_3 = resnet_node2(rn3_2.output(), c_map3, kernel_width, kernel_height,
                         conv1_w_scale, conv_b_value, sc_value, bn_time_const,
                         device)

    # Global average pooling
    poolw = 8
    poolh = 8
    poolh_stride = 1
    poolv_stride = 1

    pool = pooling(rn3_3.output(), AVG_POOLING, (1, poolh, poolw),
                   (1, poolv_stride, poolh_stride))
    out_times_params = parameter(shape=(c_map3, 1, 1, num_classes),
                                 device_id=device)
    out_bias_params = parameter(shape=(num_classes, ), device_id=device)
    t = times(pool.output(), out_times_params)
    return plus(t.output(), out_bias_params, output_name)
Ejemplo n.º 10
0
def create_model():

    # Source and target inputs to the model
    batch_axis = Axis.default_batch_axis()
    input_seq_axis = Axis('inputAxis')
    label_seq_axis = Axis('labelAxis')

    input_dynamic_axes = [batch_axis, input_seq_axis]
    raw_input = input_variable(shape=(input_vocab_dim),
                               dynamic_axes=input_dynamic_axes,
                               name='raw_input')

    label_dynamic_axes = [batch_axis, label_seq_axis]
    raw_labels = input_variable(shape=(label_vocab_dim),
                                dynamic_axes=label_dynamic_axes,
                                name='raw_labels')

    # Instantiate the sequence to sequence translation model
    input_sequence = raw_input

    # Drop the sentence start token from the label, for decoder training
    label_sequence = sequence.slice(
        raw_labels, 1, 0,
        name='label_sequence')  # <s> A B C </s> --> A B C </s>
    label_sentence_start = sequence.first(raw_labels)  # <s>

    # Setup primer for decoder
    is_first_label = sequence.is_first(label_sequence)  # 1 0 0 0 ...
    label_sentence_start_scattered = sequence.scatter(label_sentence_start,
                                                      is_first_label)

    # Encoder
    stabilize = Stabilizer()
    encoder_output_h = stabilize(input_sequence)
    for i in range(0, num_layers):
        (encoder_output_h,
         encoder_output_c) = LSTM_layer(encoder_output_h.output, hidden_dim,
                                        future_value, future_value)

    # Prepare encoder output to be used in decoder
    thought_vector_h = sequence.first(encoder_output_h)
    thought_vector_c = sequence.first(encoder_output_c)

    thought_vector_broadcast_h = sequence.broadcast_as(thought_vector_h,
                                                       label_sequence)
    thought_vector_broadcast_c = sequence.broadcast_as(thought_vector_c,
                                                       label_sequence)

    # Decoder
    decoder_history_hook = alias(
        label_sequence, name='decoder_history_hook')  # copy label_sequence

    decoder_input = element_select(is_first_label,
                                   label_sentence_start_scattered,
                                   past_value(decoder_history_hook))

    decoder_output_h = stabilize(decoder_input)
    for i in range(0, num_layers):
        if (i > 0):
            recurrence_hook_h = past_value
            recurrence_hook_c = past_value
        else:
            recurrence_hook_h = lambda operand: element_select(
                is_first_label, thought_vector_broadcast_h, past_value(operand)
            )
            recurrence_hook_c = lambda operand: element_select(
                is_first_label, thought_vector_broadcast_c, past_value(operand)
            )

        (decoder_output_h,
         decoder_output_c) = LSTM_layer(decoder_output_h.output, hidden_dim,
                                        recurrence_hook_h, recurrence_hook_c)

    # Linear output layer
    W = parameter(shape=(decoder_output_h.shape[0], label_vocab_dim),
                  init=glorot_uniform())
    B = parameter(shape=(label_vocab_dim), init=0)
    z = plus(B, times(stabilize(decoder_output_h), W))

    return z
Ejemplo n.º 11
0
    else:
        recurrence_hook_h = lambda operand: element_select(
            is_first_label, thought_vector_broadcast_h, past_value(operand))
        recurrence_hook_c = lambda operand: element_select(
            is_first_label, thought_vector_broadcast_c, past_value(operand))

    (decoder_output_h,
     decoder_output_c) = LSTM_layer(decoder_output_h.output, hidden_dim,
                                    recurrence_hook_h, recurrence_hook_c)
    # 1.
# Add the linear layer

W = parameter(shape=(decoder_output_h.shape[0], label_vocab_dim),
              init=glorot_uniform())
B = parameter(shape=(label_vocab_dim), init=0)
z = plus(B, times(decoder_output_h, W))


def create_model():

    # Source and target inputs to the model
    batch_axis = Axis.default_batch_axis()
    input_seq_axis = Axis('inputAxis')
    label_seq_axis = Axis('labelAxis')

    input_dynamic_axes = [batch_axis, input_seq_axis]
    raw_input = input_variable(shape=(input_vocab_dim),
                               dynamic_axes=input_dynamic_axes,
                               name='raw_input')

    label_dynamic_axes = [batch_axis, label_seq_axis]