def pre_block(input): # 32 x 32 x 3 conv1a = conv_bn_relu(input, (3, 3), 16, (1, 1)) # 32 x 32 x 32 conv1b = conv_bn_relu(conv1a, (3, 3), 16, (1, 1)) # 32 x 32 x 32 conv1c = conv_bn_relu(conv1b, (3, 3), 16, (1, 1)) c1 = MaxPooling((3, 3), strides=(1, 1), pad=True)(conv1c) c2 = conv_bn_relu(conv1c, (3, 3), 16, (1, 1)) d = splice(c1, c2, axis=0) # 32 x 32 x 32 e1 = conv_bn_relu(d, (1, 1), 32, (1, 1)) e2 = conv_bn_relu(e1, (3, 3), 32, (1, 1)) f1 = conv_bn_relu(d, (1, 1), 32, (1, 1)) f2 = conv_bn_relu(f1, (3, 1), 32, (1, 1)) f3 = conv_bn_relu(f2, (1, 3), 32, (1, 1)) f4 = conv_bn_relu(f3, (3, 3), 32, (1, 1)) g = splice(e2, f4, axis=0) # 32 x 32 x 64 h1 = conv_bn_relu(g, (3, 3), 64, (1, 1)) i1 = MaxPooling((3, 3), strides=(1, 1), pad=True)(g) out = splice(h1, i1, axis=0) # 32 x 32 x 128 return out
def pre_block(input, bnTimeConst): # 32 x 32 x 3 conv1a = conv_bn_relu_layer(input, 32, (3, 3), (1, 1), True, bnTimeConst) # 32 x 32 x 32 conv1b = conv_bn_relu_layer(conv1a, 32, (3, 3), (1, 1), True, bnTimeConst) # 32 x 32 x 32 conv1c = conv_bn_relu_layer(conv1b, 64, (3, 3), (1, 1), True, bnTimeConst) c1 = MaxPooling((3, 3), strides=(1, 1), pad=True)(conv1c) c2 = conv_bn_relu_layer(conv1c, 96, (3, 3), (1, 1), True, bnTimeConst) d = splice(c1, c2, axis=0) e1 = conv_bn_relu_layer(d, 64, (1, 1), (1, 1), True, bnTimeConst) e2 = conv_bn_relu_layer(e1, 96, (3, 3), (1, 1), True, bnTimeConst) f1 = conv_bn_relu_layer(d, 64, (1, 1), (1, 1), True, bnTimeConst) f2 = conv_bn_relu_layer(f1, 64, (3, 1), (1, 1), True, bnTimeConst) f3 = conv_bn_relu_layer(f2, 64, (1, 3), (1, 1), True, bnTimeConst) f4 = conv_bn_relu_layer(f3, 96, (3, 3), (1, 1), True, bnTimeConst) g = splice(e2, f4, axis=0) h1 = conv_bn_relu_layer(g, 128, (3, 3), (1, 1), True, bnTimeConst) i1 = MaxPooling((3, 3), strides=(1, 1), pad=True)(g) out = splice(h1, i1, axis=0) return out
def inception_block_5(input, num1x1, num3x3, num3x3_3x3, numPool, bnTimeConst): # 1x1 Convolution branch1x1 = conv_bn_relu_layer(input, num1x1, (1,1), (1,1), True, bnTimeConst) # 3x3 Convolution branch3x3_1 = conv_bn_relu_layer(input, num3x3[0], (1,1), (1,1), True, bnTimeConst) branch3x3_2 = conv_bn_relu_layer(branch3x3_1, num3x3[1], (1,3), (1,1), True, bnTimeConst) branch3x3_3 = conv_bn_relu_layer(branch3x3_1, num3x3[2], (3,1), (1,1), True, bnTimeConst) branch3x3 = splice(branch3x3_2, branch3x3_3, axis=0) # 3x3 3x3 Convolution branch3x3_3x3_1 = conv_bn_relu_layer(input, num3x3_3x3[0], (1,1), (1,1), True, bnTimeConst) branch3x3_3x3_2 = conv_bn_relu_layer(branch3x3_3x3_1, num3x3_3x3[1], (3,3), (1,1), True, bnTimeConst) branch3x3_3x3_3 = conv_bn_relu_layer(branch3x3_3x3_2, num3x3_3x3[1], (1,3), (1,1), True, bnTimeConst) branch3x3_3x3_4 = conv_bn_relu_layer(branch3x3_3x3_2, num3x3_3x3[3], (3,1), (1,1), True, bnTimeConst) branch3x3_3x3 = splice(branch3x3_3x3_3, branch3x3_3x3_4, axis=0) # Average Pooling branchPool_avgpool = AveragePooling((3,3), strides=(1,1), pad=True)(input) branchPool = conv_bn_relu_layer(branchPool_avgpool, numPool, (1,1), (1,1), True, bnTimeConst) out = splice(branch1x1, branch3x3, branch3x3_3x3, branchPool, axis=0) return out
def inception_block_with_avgpool(input, num1x1, num3x3r, num3x3, num3x3dblr, num3x3dbl, numPool, bnTimeConst): # 1x1 Convolution branch1x1 = conv_bn_relu_layer(input, num1x1, (1, 1), (1, 1), True, bnTimeConst) # 3x3 Convolution branch3x3_reduce = conv_bn_relu_layer(input, num3x3r, (1, 1), (1, 1), True, bnTimeConst) branch3x3 = conv_bn_relu_layer(branch3x3_reduce, num3x3, (3, 3), (1, 1), True, bnTimeConst) # Double 3x3 Convolution branch3x3dbl_reduce = conv_bn_relu_layer(input, num3x3dblr, (1, 1), (1, 1), True, bnTimeConst) branch3x3dbl_conv = conv_bn_relu_layer(branch3x3dbl_reduce, num3x3dbl, (3, 3), (1, 1), True, bnTimeConst) branch3x3dbl = conv_bn_relu_layer(branch3x3dbl_conv, num3x3dbl, (3, 3), (1, 1), True, bnTimeConst) # Average Pooling branchPool_avgpool = AveragePooling((3, 3), strides=(1, 1), pad=True)(input) branchPool = conv_bn_relu_layer(branchPool_avgpool, numPool, (1, 1), (1, 1), True, bnTimeConst) out = splice(branch1x1, branch3x3, branch3x3dbl, branchPool, axis=0) return out
def inception_block_1(input, num1x1, num5x5, num3x3dbl, numPool, bnTimeConst): # 1x1 branch1x1 = conv_bn_relu_layer(input, num1x1, (1, 1), (1, 1), True, bnTimeConst) # 1x1 -> 5x5 branch5x5_1 = conv_bn_relu_layer(input, num5x5[0], (1, 1), (1, 1), True, bnTimeConst) branch5x5 = conv_bn_relu_layer(branch5x5_1, num5x5[1], (5, 5), (1, 1), True, bnTimeConst) # 1x1 -> 3x3 -> 3x3 branch3x3dbl_1 = conv_bn_relu_layer(input, num3x3dbl[0], (1, 1), (1, 1), True, bnTimeConst) branch3x3dbl_2 = conv_bn_relu_layer(branch3x3dbl_1, num3x3dbl[1], (3, 3), (1, 1), True, bnTimeConst) branch3x3dbl = conv_bn_relu_layer(branch3x3dbl_2, num3x3dbl[2], (3, 3), (1, 1), True, bnTimeConst) # Average Pooling branchPool_avgpool = AveragePooling((3, 3), strides=(1, 1), pad=True)(input) branchPool = conv_bn_relu_layer(branchPool_avgpool, numPool, (1, 1), (1, 1), True, bnTimeConst) out = splice(branch1x1, branch5x5, branch3x3dbl, branchPool, axis=0) return out
def inception_block_3(input, num1x1, num7x7, num7x7dbl, numPool, bnTimeConst): # 1x1 Convolution branch1x1 = conv_bn_relu_layer(input, num1x1, (1, 1), (1, 1), True, bnTimeConst) # 1x1 -> 1x3 -> 3x1 branch7x7_1 = conv_bn_relu_layer(input, num7x7[0], (1, 1), (1, 1), True, bnTimeConst) branch7x7_2 = conv_bn_relu_layer(branch7x7_1, num7x7[1], (1, 3), (1, 1), True, bnTimeConst) branch7x7 = conv_bn_relu_layer(branch7x7_2, num7x7[2], (3, 1), (1, 1), True, bnTimeConst) # 1x1 -> 1x3 -> 3x1 -> 1x3 -> 3x1 branch7x7dbl_1 = conv_bn_relu_layer(input, num7x7dbl[0], (1, 1), (1, 1), True, bnTimeConst) branch7x7dbl_2 = conv_bn_relu_layer(branch7x7dbl_1, num7x7dbl[1], (3, 1), (1, 1), True, bnTimeConst) branch7x7dbl_3 = conv_bn_relu_layer(branch7x7dbl_2, num7x7dbl[2], (1, 3), (1, 1), True, bnTimeConst) branch7x7dbl_4 = conv_bn_relu_layer(branch7x7dbl_3, num7x7dbl[3], (3, 1), (1, 1), True, bnTimeConst) branch7x7dbl = conv_bn_relu_layer(branch7x7dbl_4, num7x7dbl[4], (1, 3), (1, 1), True, bnTimeConst) # Average Pooling branchPool_avgpool = AveragePooling((3, 3), strides=(1, 1), pad=True)(input) branchPool = conv_bn_relu_layer(branchPool_avgpool, numPool, (1, 1), (1, 1), True, bnTimeConst) out = splice(branch1x1, branch7x7, branch7x7dbl, branchPool, axis=0) return out
def inception_block_with_maxpool(input, num1x1, num3x3r, num3x3, num3x3dblr, num3x3dbl, numPool, bnTimeConst): # 1x1 branch1x1 = conv_bn_relu_layer(input, num1x1, (1, 1), (1, 1), True, bnTimeConst) # 1x1 -> 3x3 branch3x3_reduce = conv_bn_relu_layer(input, num3x3r, (1, 1), (1, 1), True, bnTimeConst) branch3x3 = conv_bn_relu_layer(branch3x3_reduce, num3x3, (3, 3), (1, 1), True, bnTimeConst) # 1x1 -> 3x3 -> 3x3 branch3x3dbl_reduce = conv_bn_relu_layer(input, num3x3dblr, (1, 1), (1, 1), True, bnTimeConst) branch3x3dbl_conv = conv_bn_relu_layer(branch3x3dbl_reduce, num3x3dbl, (3, 3), (1, 1), True, bnTimeConst) branch3x3dbl = conv_bn_relu_layer(branch3x3dbl_conv, num3x3dbl, (3, 3), (1, 1), True, bnTimeConst) # max pooling -> 1x1 branchPool_maxpool = MaxPooling((3, 3), strides=(1, 1), pad=True)(input) branchPool = conv_bn_relu_layer(branchPool_maxpool, numPool, (1, 1), (1, 1), True, bnTimeConst) out = splice(branch1x1, branch3x3, branch3x3dbl, branchPool, axis=0) return out
def attention_model(context_memory, query_memory, init_status, hidden_dim, att_dim, max_steps=5, init=glorot_uniform()): """ Create the attention model for reasonet Args: context_memory: Context memory query_memory: Query memory init_status: Intialize status hidden_dim: The dimention of hidden state att_dim: The dimention of attention max_step: Maxuim number of step to revisit the context memory """ gru = gru_cell((hidden_dim * 2, ), name='control_status') status = init_status output = [None] * max_steps * 2 sum_prob = None context_cos_sim = project_cosine_sim(att_dim, name='context_attention') query_cos_sim = project_cosine_sim(att_dim, name='query_attention') ans_cos_sim = project_cosine_sim(att_dim, name='candidate_attention') stop_gate = termination_gate(name='terminate_prob') prev_stop = 0 for step in range(max_steps): context_attention_weight = context_cos_sim(status, context_memory) query_attention_weight = query_cos_sim(status, query_memory) context_attention = sequence.reduce_sum(times(context_attention_weight, context_memory), name='C-Att') query_attention = sequence.reduce_sum(times(query_attention_weight, query_memory), name='Q-Att') attention = ops.splice(query_attention, context_attention, name='att-sp') status = gru(attention, status).output termination_prob = stop_gate(status) ans_attention = ans_cos_sim(status, context_memory) output[step * 2] = ans_attention if step < max_steps - 1: stop_prob = prev_stop + ops.log(termination_prob, name='log_stop') else: stop_prob = prev_stop output[step * 2 + 1] = sequence.broadcast_as( ops.exp(stop_prob, name='exp_log_stop'), output[step * 2], name='Stop_{0}'.format(step)) prev_stop += ops.log(1 - termination_prob, name='log_non_stop') final_ans = None for step in range(max_steps): if final_ans is None: final_ans = output[step * 2] * output[step * 2 + 1] else: final_ans += output[step * 2] * output[step * 2 + 1] combine_func = combine(output + [final_ans], name='Attention_func') return combine_func
def reduction_A(input, b1, c1, c2, c3): A1 = MaxPooling((3, 3), strides=(2, 2), pad=True)(input) B1 = conv_bn_relu(input, (3, 3), b1, (2, 2)) C1 = conv_bn_relu(input, (1, 1), c1, (1, 1)) C2 = conv_bn_relu(C1, (3, 3), c2, (1, 1)) C3 = conv_bn_relu(C2, (3, 3), c3, (2, 2)) out = splice(A1, B1, C3, axis=0) return out
def _convolution(x): if group == 1: apply_x = _conv_ops(w, x) else: groups_data = [ops.slice(x, axis=0, begin_index=i * sub_input_channels, end_index=(i + 1) * sub_input_channels) for i in range(0, group)] apply_sub = [_conv_ops(group_kernel, group_data) for group_kernel, group_data in zip(groups_kernel, groups_data)] apply_x = ops.splice(*apply_sub, axis=0) if bias_init is not None: apply_x += b return apply_x
def reduction_A(input, b1, c1, c2, c3, bnTimeConst): A1 = MaxPooling((3, 3), strides=(2, 2), pad=True)(input) B1 = conv_bn_relu_layer(input, b1, (3, 3), (2, 2), True, bnTimeConst) C1 = conv_bn_relu_layer(input, c1, (1, 1), (1, 1), True, bnTimeConst) C2 = conv_bn_relu_layer(C1, c2, (3, 3), (1, 1), True, bnTimeConst) C3 = conv_bn_relu_layer(C2, c3, (3, 3), (2, 2), True, bnTimeConst) out = splice(A1, B1, C3, axis=0) return out
def Res_C(input, n, m): A1 = conv_bn(input, (1,1), n, bn_init_scale = 1) B1 = conv_bn(input, (1,1), n, bn_init_scale = 1) B2 = conv_bn(B1, (1,3),n, bn_init_scale = 1) B3 = conv_bn(B2, (3,1), n, bn_init_scale = 1) C = splice(A1, B3, axis = 0) D = conv_bn(C, (1,1), m, bn_init_scale = 1) p = D + input return relu(p)
def mobilenet_basic(input, num_filters): num3x3 = input.shape[0] l_in = input[0] c = conv_bn_relu(l_in, (3, 3), 1) for i in range(1, num3x3): l = input[i] l_out = conv_bn_relu(l, (3, 3), 1) c = splice(c, l_out, axis=0) c1 = conv_bn_relu(c, (3, 3), 1) c2 = conv_bn_relu(c1, (1, 1), num_filters) return c2
def Res_C(input, a1, b1, b2, b3, c1): A1 = conv_bn(input, (1, 1), a1, bn_init_scale=1) B1 = conv_bn(input, (1, 1), b1, bn_init_scale=1) B2 = conv_bn(B1, (1, 3), b2, bn_init_scale=1) B3 = conv_bn(B2, (3, 1), b3, bn_init_scale=1) C = splice(A1, B3, axis=0) D = conv_bn(C, (1, 1), c1, bn_init_scale=1) p = D + input return relu(p)
def splice(cntk_layer, inputs): ''' Setup splice op with given parameters Args: cntk_layer (:class:`~cntk.contrib.crosstalkcaffe.unimodel.cntkmodel.CntkLayersDefinition`): the layer definition of splice op inputs (list): a list contains all :class:`~cntk.ops.functions.Function` or :class:`~cntk.input` Return: :func:`~cntk.ops.functions.Function`: instaced cntk splice op ''' return ops.splice(*inputs, axis=0, name=cntk_layer.op_name)
def BNBiRecurrence(fwd, bwd, test_dual=True): # special version that calls one shared BN instance at two places, for testing BN param tying F = Recurrence(fwd) G = Recurrence(fwd, go_backwards=True) BN = BatchNormalization(normalization_time_constant=-1) x = Placeholder() # The following code applies the same BN function object twice. # When running whole-corpus estimation of means/vars, this must lead to the same estimate # although it is estimated on twice the amount of data (each sample is used twice). # Hence, this is the test that proves that the parameter sharing works. x1 = BN(x) x2 = BN(x) if test_dual else x1 # In double precision with corpus aggregation, these lead to the same result. apply_x = splice (F(x1), G(x2)) return apply_x
def BNBiRecurrence(fwd, bwd, test_dual=True): # special version that calls one shared BN instance at two places, for testing BN param tying F = Recurrence(fwd) G = Recurrence(fwd, go_backwards=True) BN = BatchNormalization(normalization_time_constant=-1) x = placeholder() # The following code applies the same BN function object twice. # When running whole-corpus estimation of means/vars, this must lead to the same estimate # although it is estimated on twice the amount of data (each sample is used twice). # Hence, this is the test that proves that the parameter sharing works. x1 = BN(x) x2 = BN(x) if test_dual else x1 # In double precision with corpus aggregation, these lead to the same result. apply_x = splice (F(x1), G(x2)) return apply_x
def Res_A(input, a1, b1, c1, c2, c3, d1): A1 = conv_bn_relu(input, (1, 1), a1) B1 = conv_bn(input, (1, 1), b1, bn_init_scale=1) B2 = conv_bn(B1, (3, 3), b1, bn_init_scale=1) C1 = conv_bn(input, (1, 1), c1, bn_init_scale=1) C2 = conv_bn(C1, (3, 3), c2, bn_init_scale=1) C3 = conv_bn(C2, (3, 3), c3, bn_init_scale=1) out = splice(A1, B2, C3, axis=0) out2 = conv_bn(out, (1, 1), d1, bn_init_scale=1) p = out2 + input return relu(p)
def Inception_A(input, a1, b1, c1, c2, d1, d2, bnTimeConst): A1 = AveragePooling((3, 3), strides=(1, 1), pad=True)(input) A2 = conv_bn_relu_layer(A1, a1, (3, 3), (1, 1), True, bnTimeConst) B1 = conv_bn_relu_layer(input, b1, (1, 1), (1, 1), True, bnTimeConst) C1 = conv_bn_relu_layer(input, c1, (1, 1), (1, 1), True, bnTimeConst) C2 = conv_bn_relu_layer(C1, c2, (3, 3), (1, 1), True, bnTimeConst) D1 = conv_bn_relu_layer(input, d1, (1, 1), (1, 1), True, bnTimeConst) D2 = conv_bn_relu_layer(D1, d2, (3, 3), (1, 1), True, bnTimeConst) D3 = conv_bn_relu_layer(D2, d2, (3, 3), (1, 1), True, bnTimeConst) out = splice(A2, B1, C2, D3, axis=0) return out
def inception_block_2(input, num3x3, num3x3dbl, bnTimeConst): # 3x3 Convolution branch3x3 = conv_bn_relu_layer(input, num3x3, (3,3), (2,2), True, bnTimeConst) # Double 3x3 Convolution branch3x3dbl_1 = conv_bn_relu_layer(input, num3x3dbl[0], (1,1), (1,1), True, bnTimeConst) branch3x3dbl_2 = conv_bn_relu_layer(branch3x3dbl_1, num3x3dbl[1], (3,3), (1,1), True, bnTimeConst) branch3x3dbl = conv_bn_relu_layer(branch3x3dbl_2, num3x3dbl[2], (3,3), (2,2), True, bnTimeConst) # Max Pooling branchPool = MaxPooling((3,3), strides=(2,2), pad=True)(input) out = splice(branch3x3, branch3x3dbl, branchPool, axis=0) return out
def Res_A(input, n, m): a1 = conv_bn(input, (1,1), n, bn_init_scale = 1) b1 = conv_bn(input, (1,1), n, bn_init_scale = 1) b2 = conv_bn(b1, (3,3), n, bn_init_scale = 1) c1 = conv_bn(input, (1,1), n, bn_init_scale = 1) c2 = conv_bn(c1, (3,3), n, bn_init_scale = 1) c3 = conv_bn(c2, (3,3), n, bn_init_scale = 1) out = splice(a1, b2, c3, axis = 0) out2 = conv_bn(out, (1,1), m, bn_init_scale = 1) p = out2 + input return relu(p)
def reduction_B(input, b1, b2, c1, c2, d1, d2, d3): A1 = MaxPooling(filter_shape=(3, 3), strides=(2, 2), pad=True)(input) B1 = conv_bn_relu(input, (1, 1), b1, (1, 1)) B2 = conv_bn_relu(B1, (3, 3), b2, (2, 2)) C1 = conv_bn_relu(input, (1, 1), c1, (1, 1)) C2 = conv_bn_relu(C1, (3, 3), c2, (2, 2)) D1 = conv_bn_relu(input, (1, 1), d1, (1, 1)) D2 = conv_bn_relu(D1, (3, 3), d2, (1, 1)) D3 = conv_bn_relu(D2, (3, 3), d3, (2, 2)) out = splice(A1, B2, C2, D3, axis=0) return out
def inception_block_2(input, num3x3, num3x3dbl, bnTimeConst): # 3x3 Convolution branch3x3 = conv_bn_relu_layer(input, num3x3, (3,3), (2,2), False, bnTimeConst) # Double 3x3 Convolution branch3x3dbl_1 = conv_bn_relu_layer(input, num3x3dbl[0], (1,1), (1,1), True, bnTimeConst) branch3x3dbl_2 = conv_bn_relu_layer(branch3x3dbl_1, num3x3dbl[1], (3,3), (1,1), True, bnTimeConst) branch3x3dbl = conv_bn_relu_layer(branch3x3dbl_2, num3x3dbl[2], (3,3), (2,2), False, bnTimeConst) # Max Pooling branchPool = MaxPooling((3,3), strides=(2,2), pad=False)(input) out = splice(branch3x3, branch3x3dbl, branchPool, axis=0) return out
def inception_block_pass_through(input, num1x1, num3x3r, num3x3, num3x3dblr, num3x3dbl, numPool, bnTimeConst): # 3x3 Convolution branch3x3_reduce = conv_bn_relu_layer(input, num3x3r, (1,1), (1,1), True, bnTimeConst) branch3x3 = conv_bn_relu_layer(branch3x3_reduce, num3x3, (3,3), (2,2), True, bnTimeConst) # Double 3x3 Convolution branch3x3dbl_reduce = conv_bn_relu_layer(input, num3x3dblr, (1,1), (1,1), True, bnTimeConst) branch3x3dbl_conv = conv_bn_relu_layer(branch3x3dbl_reduce, num3x3dbl, (3,3), (1,1), True, bnTimeConst) branch3x3dbl = conv_bn_relu_layer(branch3x3dbl_conv, num3x3dbl, (3,3), (2,2), True, bnTimeConst) # Max Pooling branchPool = MaxPooling((3,3), strides=(2,2), pad=True)(input) out = splice(branch3x3, branch3x3dbl, branchPool, axis=0) return out
def inception_block_4(input, num3x3, num7x7_3x3, bnTimeConst): # 3x3 Convolution branch3x3_1 = conv_bn_relu_layer(input, num3x3[0], (1,1), (1,1), True, bnTimeConst) branch3x3 = conv_bn_relu_layer(branch3x3_1, num3x3[1], (3,3), (2,2), False, bnTimeConst) # 7x7 3x3 Convolution branch7x7_3x3_1 = conv_bn_relu_layer(input, num7x7_3x3[0], (1,1), (1,1), True, bnTimeConst) branch7x7_3x3_2 = conv_bn_relu_layer(branch7x7_3x3_1, num7x7_3x3[1], (1,7), (1,1), True, bnTimeConst) branch7x7_3x3_3 = conv_bn_relu_layer(branch7x7_3x3_2, num7x7_3x3[2], (7,1), (1,1), True, bnTimeConst) branch7x7_3x3 = conv_bn_relu_layer(branch7x7_3x3_3, num7x7_3x3[3], (3,3), (2,2), False, bnTimeConst) # Max Pooling branchPool = MaxPooling((3,3), strides=(2,2), pad=False)(input) out = splice(branch3x3, branch7x7_3x3, branchPool, axis=0) return out
def _convolution(x): if group == 1: apply_x = _conv_ops(w, x) else: groups_data = [ ops.slice(x, axis=0, begin_index=i * sub_input_channels, end_index=(i + 1) * sub_input_channels) for i in range(0, group) ] apply_sub = [ _conv_ops(group_kernel, group_data) for group_kernel, group_data in zip( groups_kernel, groups_data) ] apply_x = ops.splice(*apply_sub, axis=0) if bias_init is not None: apply_x += b return apply_x
def inception_block_with_avgpool(input, num1x1, num3x3r, num3x3, num3x3dblr, num3x3dbl, numPool, bnTimeConst): # 1x1 Convolution branch1x1 = conv_bn_relu_layer(input, num1x1, (1,1), (1,1), True, bnTimeConst) # 3x3 Convolution branch3x3_reduce = conv_bn_relu_layer(input, num3x3r, (1,1), (1,1), True, bnTimeConst) branch3x3 = conv_bn_relu_layer(branch3x3_reduce, num3x3, (3,3), (1,1), True, bnTimeConst) # Double 3x3 Convolution branch3x3dbl_reduce = conv_bn_relu_layer(input, num3x3dblr, (1,1), (1,1), True, bnTimeConst) branch3x3dbl_conv = conv_bn_relu_layer(branch3x3dbl_reduce, num3x3dbl, (3,3), (1,1), True, bnTimeConst) branch3x3dbl = conv_bn_relu_layer(branch3x3dbl_conv, num3x3dbl, (3,3), (1,1), True, bnTimeConst) # Average Pooling branchPool_avgpool = AveragePooling((3,3), strides=(1,1), pad=True)(input) branchPool = conv_bn_relu_layer(branchPool_avgpool, numPool, (1,1), (1,1), True, bnTimeConst) out = splice(branch1x1, branch3x3, branch3x3dbl, branchPool, axis=0) return out
def inception_block_1(input, num1x1, num5x5, num3x3dbl, numPool, bnTimeConst): # 1x1 Convolution branch1x1 = conv_bn_relu_layer(input, num1x1, (1,1), (1,1), True, bnTimeConst) # 5x5 Convolution branch5x5_1 = conv_bn_relu_layer(input, num5x5[0], (1,1), (1,1), True, bnTimeConst) branch5x5 = conv_bn_relu_layer(branch5x5_1, num5x5[1], (5,5), (1,1), True, bnTimeConst) # Double 3x3 Convolution branch3x3dbl_1 = conv_bn_relu_layer(input, num3x3dbl[0], (1,1), (1,1), True, bnTimeConst) branch3x3dbl_2 = conv_bn_relu_layer(branch3x3dbl_1, num3x3dbl[1], (3,3), (1,1), True, bnTimeConst) branch3x3dbl = conv_bn_relu_layer(branch3x3dbl_2, num3x3dbl[2], (3,3), (1,1), True, bnTimeConst) # Average Pooling branchPool_avgpool = AveragePooling((3,3), strides=(1,1), pad=True)(input) branchPool = conv_bn_relu_layer(branchPool_avgpool, numPool, (1,1), (1,1), True, bnTimeConst) out = splice(branch1x1, branch5x5, branch3x3dbl, branchPool, axis=0) return out
def Res_B(input, n, m): a1 = conv_bn(input, (1,1), n, bn_init_scale = 1) b1 = conv_bn(input, (1,1), n, bn_init_scale = 1) b2 = conv_bn(b1, (1,3), n, bn_init_scale = 1) b3 = conv_bn(b2, (3,1), n, bn_init_scale = 1) c = splice(a1, b3, axis = 0) d = conv_bn(c, (1,1), m, bn_init_scale = 1) p = d + input return relu(p)
def inception_block2(input, num1x1, num3x3r, num3x3, num3x3dblr, num3x3dbl, numPool, bnTimeConst): # 1x1 branch1x1 = conv_bn_relu_layer(input, num1x1, (1,1), (1,1), True, bnTimeConst) # 1x1 -> 1x3 -> 3x1 branch3x3_reduce = conv_bn_relu_layer(input, num3x3r, (1,1), (1,1), True, bnTimeConst) branch3x3_conv1 = conv_bn_relu_layer(branch3x3_reduce, num3x3, (1,3), (1,1), True, bnTimeConst) branch3x3_conv2 = conv_bn_relu_layer(branch3x3_conv1, num3x3, (3,1), (1,1), True, bnTimeConst) # 1x1 -> 1x3 -> 3x1 -> 1x3 -> 3x1 branch3x3dbl_reduce = conv_bn_relu_layer(input, num3x3dblr, (1,1), (1,1), True, bnTimeConst) branch3x3dbl_conv1 = conv_bn_relu_layer(branch3x3dbl_reduce, num3x3dbl, (1,3), (1,1), True, bnTimeConst) branch3x3dbl_conv2 = conv_bn_relu_layer(branch3x3dbl_conv1, num3x3dbl, (3,1), (1,1), True, bnTimeConst) branch3x3dbl_conv3 = conv_bn_relu_layer(branch3x3dbl_conv2, num3x3dbl, (1,3), (1,1), True, bnTimeConst) branch3x3dbl_conv4 = conv_bn_relu_layer(branch3x3dbl_conv3, num3x3dbl, (3,1), (1,1), True, bnTimeConst) # avg pooling -> 1x1 branchPool_avgpool = AveragePooling((3,3), strides = (1,1), pad = True)(input) branchPool = conv_bn_relu_layer(branchPool_avgpool, numPool, (1,1), (1,1), True, bnTimeConst) out = splice(branch1x1, branch3x3_conv2, branch3x3dbl_conv4, branchPool, axis = 0) return out
def inception_block_3(input, num1x1, num7x7, num7x7dbl, numPool, bnTimeConst): # 1x1 Convolution branch1x1 = conv_bn_relu_layer(input, num1x1, (1,1), (1,1), True, bnTimeConst) # 7x7 Convolution branch7x7_1 = conv_bn_relu_layer(input, num7x7[0], (1,1), (1,1), True, bnTimeConst) branch7x7_2 = conv_bn_relu_layer(branch7x7_1, num7x7[1], (1,7), (1,1), True, bnTimeConst) branch7x7 = conv_bn_relu_layer(branch7x7_2, num7x7[2], (7,1), (1,1), True, bnTimeConst) # Double 7x7 Convolution branch7x7dbl_1 = conv_bn_relu_layer(input, num7x7dbl[0], (1,1), (1,1), True, bnTimeConst) branch7x7dbl_2 = conv_bn_relu_layer(branch7x7dbl_1, num7x7dbl[1], (7,1), (1,1), True, bnTimeConst) branch7x7dbl_3 = conv_bn_relu_layer(branch7x7dbl_2, num7x7dbl[2], (1,7), (1,1), True, bnTimeConst) branch7x7dbl_4 = conv_bn_relu_layer(branch7x7dbl_3, num7x7dbl[3], (7,1), (1,1), True, bnTimeConst) branch7x7dbl = conv_bn_relu_layer(branch7x7dbl_4, num7x7dbl[4], (1,7), (1,1), True, bnTimeConst) # Average Pooling branchPool_avgpool = AveragePooling((3,3), strides=(1,1), pad=True)(input) branchPool = conv_bn_relu_layer(branchPool_avgpool, numPool, (1,1), (1,1), True, bnTimeConst) out = splice(branch1x1, branch7x7, branch7x7dbl, branchPool, axis=0) return out
def create_model(params: model_params): """ Create ReasoNet model Args: params (class:`model_params`): The parameters used to create the model """ logger.log( "Create model: dropout_rate: {0}, init:{1}, embedding_init: {2}". format(params.dropout_rate, params.init, params.embedding_init)) # Query and Doc/Context/Paragraph inputs to the model query_seq_axis = Axis('sourceAxis') context_seq_axis = Axis('contextAxis') query_sequence = sequence.input(shape=(params.vocab_dim), is_sparse=True, sequence_axis=query_seq_axis, name='query') context_sequence = sequence.input(shape=(params.vocab_dim), is_sparse=True, sequence_axis=context_seq_axis, name='context') entity_ids_mask = sequence.input(shape=(1, ), is_sparse=False, sequence_axis=context_seq_axis, name='entity_ids_mask') # embedding if params.embedding_init is None: embedding_init = create_random_matrix(params.vocab_dim, params.embedding_dim) else: embedding_init = params.embedding_init embedding = parameter(shape=(params.vocab_dim, params.embedding_dim), init=None) embedding.value = embedding_init embedding_matrix = constant(embedding_init, shape=(params.vocab_dim, params.embedding_dim)) if params.dropout_rate is not None: query_embedding = ops.dropout(times(query_sequence, embedding), params.dropout_rate, name='query_embedding') context_embedding = ops.dropout(times(context_sequence, embedding), params.dropout_rate, name='context_embedding') else: query_embedding = times(query_sequence, embedding, name='query_embedding') context_embedding = times(context_sequence, embedding, name='context_embedding') contextGruW = Parameter(_INFERRED + _as_tuple(params.hidden_dim), init=glorot_uniform(), name='gru_params') queryGruW = Parameter(_INFERRED + _as_tuple(params.hidden_dim), init=glorot_uniform(), name='gru_params') entity_embedding = ops.times(context_sequence, embedding_matrix, name='constant_entity_embedding') # Unlike other words in the context, we keep the entity vectors fixed as a random vector so that each vector just means an identifier of different entities in the context and it has no semantic meaning full_context_embedding = ops.element_select(entity_ids_mask, entity_embedding, context_embedding) context_memory = ops.optimized_rnnstack(full_context_embedding, contextGruW, params.hidden_dim, 1, True, recurrent_op='gru', name='context_mem') query_memory = ops.optimized_rnnstack(query_embedding, queryGruW, params.hidden_dim, 1, True, recurrent_op='gru', name='query_mem') qfwd = ops.slice(sequence.last(query_memory), -1, 0, params.hidden_dim, name='fwd') qbwd = ops.slice(sequence.first(query_memory), -1, params.hidden_dim, params.hidden_dim * 2, name='bwd') init_status = ops.splice( qfwd, qbwd, name='Init_Status') # get last fwd status and first bwd status return attention_model(context_memory, query_memory, init_status, params.hidden_dim, params.attention_dim, max_steps=params.max_rl_steps)
def BiRecurrence(fwd, bwd): F = Recurrence(fwd) G = Recurrence(fwd, go_backwards=True) x = placeholder() apply_x = splice (F(x), G(x)) return apply_x
def with_lookahead(): x = Placeholder() future_x = future_value(x) apply_x = splice (x, future_x) return apply_x
def BiRecurrence(fwd, bwd): F = Recurrence(fwd) G = Recurrence(fwd, go_backwards=True) x = Placeholder() apply_x = splice (F(x), G(x)) return apply_x
def with_lookahead(): x = placeholder() future_x = sequence.future_value(x) apply_x = splice (x, future_x) return apply_x