def gated_attention_gru_layer(self, context, query): q_processed = C.placeholder(shape=(2*self.hidden_dim,)) c_processed = C.placeholder(shape=(2*self.hidden_dim,)) #gate weight Wg = C.parameter(shape=(4*self.hidden_dim, 4*self.hidden_dim)) att_gru = C.layers.GRU(2*self.hidden_dim) attention_model = C.layers.AttentionModel(self.hidden_dim, name='attention_model') @C.Function def out_func0(att_input, enc_input): enc_input2 = enc_input @C.Function def gru_with_attentioin(dh, x): c_att = attention_model(att_input, x) x = C.splice(x, c_att) x = C.element_times(x, C.sigmoid(C.times(x, Wg))) return att_gru(dh, x) att_context = Recurrence(gru_with_attentioin)(enc_input2) return att_context att_context = out_func0(q_processed, c_processed) return C.as_block( att_context, [(c_processed, context), (q_processed, query)], 'gated_attention_gru_layer', 'gated_attention_gru_layer')
def lightlstm(input_dim, cell_dim): x = C.placeholder(name='x') dh = C.placeholder(name='dh') dc = C.placeholder(name='dc') x1 = C.slice(x, -1, input_dim * 0, input_dim * 1) x2 = C.slice(x, -1, input_dim * 1, input_dim * 2) def LSTMCell(x, y, dh, dc): '''LightLSTM Cell''' b = C.parameter(shape=(4 * cell_dim), init=0) W = C.parameter(shape=(input_dim, 4 * cell_dim), init=glorot_uniform()) H = C.parameter(shape=(cell_dim, 4 * cell_dim), init=glorot_uniform()) # projected contribution from input x, hidden, and bias proj4 = b + C.times(x, W) + C.times(dh, H) it_proj = C.slice(proj4, -1, 0 * cell_dim, 1 * cell_dim) bit_proj = C.slice(proj4, -1, 1 * cell_dim, 2 * cell_dim) ft_proj = C.slice(proj4, -1, 2 * cell_dim, 3 * cell_dim) ot_proj = C.slice(proj4, -1, 3 * cell_dim, 4 * cell_dim) it = C.sigmoid(it_proj) # input gate bit = it * C.tanh(bit_proj) ft = C.sigmoid(ft_proj) # forget gate bft = ft * dc ct = bft + bit ot = C.sigmoid(ot_proj) # output gate ht = ot * C.tanh(ct) # projected contribution from input y, hidden, and bias proj4_2 = b + C.times(y, W) + C.times(ht, H) it_proj_2 = C.slice(proj4_2, -1, 0 * cell_dim, 1 * cell_dim) bit_proj_2 = C.slice(proj4_2, -1, 1 * cell_dim, 2 * cell_dim) ft_proj_2 = C.slice(proj4_2, -1, 2 * cell_dim, 3 * cell_dim) ot_proj_2 = C.slice(proj4_2, -1, 3 * cell_dim, 4 * cell_dim) it_2 = C.sigmoid(it_proj_2) # input gate bit_2 = it_2 * C.tanh(bit_proj_2) ft_2 = C.sigmoid(ft_proj_2) # forget gate bft_2 = ft_2 * ct ct2 = bft_2 + bit_2 ot_2 = C.sigmoid(ot_proj_2) # output gate ht2 = ot_2 * C.tanh(ct2) return (ht, ct, ht2, ct2) Cell = LSTMCell(x1, x2, dh, dc) actualDh = past_value(Cell[2]) actualDc = past_value(Cell[3]) Cell[0].replace_placeholders( {dh: actualDh.output, dc: actualDc.output}) return C.splice(Cell[0], Cell[2], axis=-1)
def input_layer(self,cgw,cnw,cc,qgw,qnw,qc): cgw_ph = C.placeholder() cnw_ph = C.placeholder() cc_ph = C.placeholder() qgw_ph = C.placeholder() qnw_ph = C.placeholder() qc_ph = C.placeholder() input_chars = C.placeholder(shape=(1,self.word_size,self.c_dim)) input_glove_words = C.placeholder(shape=(self.wg_dim,)) input_nonglove_words = C.placeholder(shape=(self.wn_dim,)) # we need to reshape because GlobalMaxPooling/reduce_max is retaining a trailing singleton dimension # todo GlobalPooling/reduce_max should have a keepdims default to False embedded = C.splice( C.reshape(self.charcnn(input_chars), self.convs), self.embed()(input_glove_words, input_nonglove_words), name='splice_embed') processed = C.layers.Sequential([For(range(2), lambda: OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name='input_rnn'))])(embedded) qce = C.one_hot(qc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse) cce = C.one_hot(cc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse) q_processed = processed.clone(C.CloneMethod.share, {input_chars:qce, input_glove_words:qgw_ph, input_nonglove_words:qnw_ph}) c_processed = processed.clone(C.CloneMethod.share, {input_chars:cce, input_glove_words:cgw_ph, input_nonglove_words:cnw_ph}) return C.as_block( C.combine([c_processed, q_processed]), [(cgw_ph, cgw),(cnw_ph, cnw),(cc_ph, cc),(qgw_ph, qgw),(qnw_ph, qnw),(qc_ph, qc)], 'input_layer', 'input_layer')
def output_layer(self, query, match_context): q_processed = C.placeholder(shape=(2*self.hidden_dim,)) mat_context = C.placeholder(shape=(2*self.hidden_dim,)) #output layer r_q = question_pooling(q_processed, 2*self.hidden_dim) #shape n*(2*self.hidden_dim) p1_logits = attention_weight(mat_context, r_q, 2*self.hidden_dim) attention_pool = C.sequence.reduce_sum(p1_logits * mat_context) state = C.layers.GRU(2*self.hidden_dim)(attention_pool, r_q) p2_logits = attention_weight(mat_context, state, 2*self.hidden_dim) @C.Function def start_ave_point(p1_logits, p2_logits, point): @C.Function def start_ave(last, now): now = now + last - last new_start = now * C.sequence.gather(p2_logits, point) point = C.sequence.future_value(point) return new_start start_logits_ave = C.layers.Recurrence(start_ave)(p1_logits) return start_logits_ave point = C.sequence.is_first(p1_logits) point = C.layers.Sequential([For(range(2), lambda: C.layers.Recurrence(C.plus))])(point) point = C.greater(C.constant(16), point) start_logits_ave = start_ave_point(p1_logits, p2_logits, point) @C.Function def end_ave_point(p1_logits, p2_logits, point): @C.Function def end_ave(last, now): now = now + last - last new_end = now * C.sequence.gather(p2_logits, point) point = C.sequence.past_value(point) return new_end end_logits_ave = C.layers.Recurrence(end_ave, go_backwards=True)(p2_logits) return end_logits_ave point = C.sequence.is_last(p1_logits) point = C.layers.Sequential([For(range(2), lambda: C.layers.Recurrence(C.plus, go_backwards=True))])(point) point = C.greater(C.constant(16),point) end_logits_ave = end_ave_point(p1_logits, p2_logits, point) start_logits = seq_hardmax(start_logits_ave) end_logits = seq_hardmax(end_logits_ave) ''' start_logits = seq_hardmax(p1_logits) end_logits = seq_hardmax(p2_logits) ''' return C.as_block( C.combine([start_logits, end_logits]), [(q_processed, query), (mat_context, match_context)], 'output_layer', 'output_layer')
def test_get_data_type(): pa32 = C.parameter(init=np.asarray(2, dtype=np.float32)) pa64 = C.parameter(init=np.asarray(2, dtype=np.float64)) pl = C.placeholder(shape=(2)) c = C.constant(value=3.0) n32 = AA(1, dtype=np.float32) n64 = AA(1, dtype=np.float64) assert get_data_type(pa32) == np.float32 assert get_data_type(pa32, n32) == np.float32 assert get_data_type(n32, n32) == np.float32 assert get_data_type(n32, n64) == np.float64 assert get_data_type(pl, n64) == np.float64 assert get_data_type(pl, n32) == np.float32 assert get_data_type(pl, pl) is None # variable's type shall take precedence over provided data assert get_data_type(pa32, n64) == np.float32 assert get_data_type(pa64, n64) == np.float64 assert get_data_type(pa32, pl, n64) == np.float32 assert get_data_type(pa64, pl, n64) == np.float64 assert get_data_type(np.float64(1)) == np.float64 assert get_data_type(np.float32(1)) == np.float32 assert get_data_type(np.int64(1)) == np.float32 # special case for cntk assert get_data_type(1) == np.float32 assert get_data_type(1.0) == np.float32
def test_clone_with_slice(): i1 = C.input_variable((2,2), name='i1') i2 = C.input_variable((2,2), name='i2') x = C.splice(i1, i2, axis=0) W = C.constant(1, (4,1), name='W') y = C.convolution(W, x) assert(y.shape == (4,2)) from ..functions import CloneMethod x1 = C.input_variable((2,1), name='x1') x2 = C.input_variable((2,1), name='x2') p1 = C.placeholder() p2 = C.placeholder() y_cloned = y.clone('clone', {i1:p1, i2:p2}) y2 = y_cloned(x1, x2) assert(y2.shape == (4,1))
def test_op_sequence_reduce_sum(device_id, precision): a = C.sequence.input_variable(shape=(1,), dtype=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]), needs_gradient=True, name='a') sequence_sum_a_plus_sequence_sum_a = C.sequence.reduce_sum(a) + C.sequence.reduce_sum(a) a_data = [AA([[2]], dtype=PRECISION_TO_TYPE[precision]), AA([[2], [3]], dtype=PRECISION_TO_TYPE[precision]), AA([[2], [3], [4]], dtype=PRECISION_TO_TYPE[precision])] actual_grad = sequence_sum_a_plus_sequence_sum_a.grad({a: a_data}, [a]) assert np.array_equal(actual_grad[0], np.asarray([[2.]])) assert np.array_equal(actual_grad[1], np.asarray([[2.], [2.]])) assert np.array_equal(actual_grad[2], np.asarray([[2.], [2.], [2.]])) res = sequence_sum_a_plus_sequence_sum_a.eval({a: a_data}) assert np.array_equal(res[0], np.asarray([4.])) assert np.array_equal(res[1], np.asarray([10.])) assert np.array_equal(res[2], np.asarray([18.])) # Verify that calling sequence reduction on a placeholder with known # shape but unknown dynamic axes does not result in a problem p = C.placeholder(shape=(1,)) r = C.sequence.reduce_sum(p) r.replace_placeholder(a) res = r.eval({a: a_data}) assert np.array_equal(res[0], np.asarray([2.])) assert np.array_equal(res[1], np.asarray([5.])) assert np.array_equal(res[2], np.asarray([9.]))
def test_block_with_unused_outputs(): p1 = C.placeholder() p3 = C.placeholder() func1 = C.as_block(p1 + 1, [(p1, p3)], 'plus_func_1') p2 = C.placeholder() p4 = C.placeholder() func2 = C.as_block(p2 + 1, [(p2, p4)], 'plus_func_2') p5 = C.placeholder() func3 = C.as_block(C.combine([func2]), [(p4, p5)], 'empty_block') input_var1 = C.input_variable(shape=()) input_var2 = C.input_variable(shape=()) block = C.as_block(C.combine([func1, func3]), [(p3, input_var1), (p5, input_var2)], 'multi_output_block') eval_root = C.combine([block.outputs[0]]) result = eval_root.eval({input_var1 : np.asarray([3], dtype=np.float32), input_var2 : np.asarray([-3], dtype=np.float32)}) assert np.array_equal(result, [ 4.])
def test_sequence_unpack_basic(device_id): dev = cntk_device(device_id) # Unpack a placeholder p = C.placeholder() p_unpacked_outputs = C.sequence.unpack(p, padding_value=0).outputs assert len(p_unpacked_outputs) == 2 x = C.input_variable((C.FreeDimension, 2, 3), is_sparse=False) x_seq_lens = C.input_variable(()) x_seq = C.to_sequence(x, x_seq_lens) x_seq_unpacked = C.sequence.unpack(x_seq, padding_value=-1000.0) x_seq_unpacked_value_output = x_seq_unpacked.outputs[0] x_seq_unpacked_mask_output = x_seq_unpacked.outputs[1] assert len(x_seq_unpacked_value_output.dynamic_axes) == 1 assert x_seq_unpacked_value_output.shape == (C.FreeDimension, 2, 3) seq1_data = [[[0, 1, 1], [0, 1, 0]], [[1, 0, 0], [1, 0, 1]]] seq2_data = [[0, 1, 1], [1, 1, 0]] x_data = [np.asarray(seq1_data, dtype=np.float32), np.asarray([seq2_data, [[-100.0, -100.0, -100.0], [-100.0, -100.0, -100.0]]], dtype=np.float32)] x_seq_lens_data = np.asarray([2, 1], dtype=np.float32) result = x_seq_unpacked.eval({x : x_data, x_seq_lens : x_seq_lens_data}, device=dev) value = result[x_seq_unpacked_value_output] mask = result[x_seq_unpacked_mask_output] assert np.array_equal(value[0], seq1_data) assert np.array_equal(value[1], [seq2_data, [[-1000.0, -1000.0, -1000.0], [-1000.0, -1000.0, -1000.0]]]) assert np.array_equal(mask, [[1, 1], [1, 0]])
def matching_attention_layer(self, attention_context): att_context = C.placeholder(shape=(2*self.hidden_dim,)) #matching layer matching_model = C.layers.AttentionModel(attention_dim=self.hidden_dim, name='attention_model') #gate weight Wg = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim)) #gru att_gru = C.layers.GRU(self.hidden_dim) @C.Function def out_func1(att_input, enc_input): enc_input2 = enc_input @C.Function def bigru_with_match(dh, x): c_att = matching_model(att_input, dh) x = C.splice(x, c_att) x = C.element_times(x, C.sigmoid(C.times(x, Wg))) return att_gru(dh, x) return C.splice(C.layers.Recurrence(bigru_with_match)(enc_input2), C.layers.Recurrence(bigru_with_match, go_backwards=True)(enc_input2), name="bigru_with_match") match_context = out_func1(att_context, att_context) return C.as_block( match_context, [(att_context, attention_context)], 'matching_attention_layer', 'matching_attention_layer')
def test_recurrance_with_udf_without_layers(): name = "SimpleUdf" def udf(a): return C.user_function(SimpleUdf(a, name=name)) # input varibale and the data. x = C.sequence.input_variable(needs_gradient=True,shape=(2,)) x0 = np.reshape(np.arange(16.0, dtype=np.float32),(2,4,2)) print(x0) # creates a recurrent loop. p = C.placeholder(shape=(2,)) past= C.sequence.past_value(p) z = udf(x) * udf(past) + C.Parameter((2,), init=[1,1]) z.replace_placeholders({p:z.outputs[0]}) #C.logging.graph.plot(z, "recurrent.pdf") out = z.eval({x:x0}) print(out) expected_out = [np.array([1,1,3,4,13,21,79,148], dtype=np.float32).reshape(4,2),np.array([1,1,11,12,133,157,1863,2356], dtype=np.float32).reshape(4,2)] assert np.array_equal(out, expected_out) gradient, result= z.grad({x: x0}, wrt=[x], outputs=[z.output]) print(result) assert np.array_equal(result, expected_out) expected_grad = [np.array([0,0,29,41,21,32,13,21], dtype=np.float32).reshape(4,2),np.array([0,0,181,209,165,192,133,157], dtype=np.float32).reshape(4,2)] print(gradient) assert np.array_equal(gradient, expected_grad)
def BinaryConvolution(operand, filter_shape, num_filters=1, channels = 1, init=C.glorot_uniform(), pad=False, strides=1, bias=True, init_bias=0, op_name='BinaryConvolution', name=''): """ arguments: operand: tensor to convolve filter_shape: tuple indicating filter size num_filters: number of filters to use channels: number of incoming channels init: type of initialization to use for weights """ kernel_shape = (num_filters, channels) + filter_shape W = C.parameter(shape=kernel_shape, init=init, name="filter") binary_convolve_operand_p = C.placeholder(operand.shape, operand.dynamic_axes, name="operand") binary_convolve = C.convolution(CustomMultibit(W, 1), CustomMultibit(binary_convolve_operand_p, 1), auto_padding=[False, pad, pad], strides=[strides]) r = C.as_block(binary_convolve, [(binary_convolve_operand_p, operand)], 'binary_convolve') bias_shape = (num_filters, 1, 1) b = C.parameter(shape=bias_shape, init=init_bias, name="bias") r = r + b # apply learnable param relu P = C.parameter(shape=r.shape, init=init, name="prelu") r = C.param_relu(P, r) return r
def test_recurrence_shape_inference(): i = C.sequence.input_variable((2,)) p = C.placeholder() p_past = C.sequence.past_value(p) p_past_plus_i = p_past + i p_past_plus_i.replace_placeholder(p_past_plus_i.output) assert p_past_plus_i.output.shape == (2,)
def returnFunction(): left_val = [[10,2]] right_val = [[2],[3]] p = placeholder(shape=(1,2)) op = times(p, right_val) c = constant(left_val) return op.replace_placeholders({p:c})
def create_model(): x = C.placeholder() with C.layers.default_options(initial_state=0.1): e = C.layers.Embedding(emb_dim, name='embed')(x) negRnn = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=True)(e) posRnn = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(e) h = C.splice(posRnn, negRnn) out = C.layers.Dense(num_labels, name='classify')(h) return out
def convolution(operand): bcv_operand_p = C.placeholder( operand.shape, operand.dynamic_axes, name="operand") bcv = C.convolution( CustomMultibit(W, 1), CustomMultibit(bcv_operand_p, 1), auto_padding=[False, pad, pad], strides=[strides]) return C.as_block(bcv, [(bcv_operand_p, operand)], name)
def test_clone_with_function_in_substitution_map(): input_dim = 1 proj_dim = 2 x = C.input_variable((input_dim,)) w = C.parameter((input_dim, proj_dim)) t = C.times(x, w) b = C.parameter((proj_dim)) t_plus_b = t + b p = C.placeholder() just_b = t_plus_b.clone('clone', {t : p}) t_plus_b_clone = just_b.clone('share', {p : t})
def test_ext_eval_7_placeholder(): dim = 4 p = C.parameter(shape=(dim,), init=10, name='p') i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var') pl = C.placeholder() m = C.user_function(MyPlus(pl, C.constant(3))) z = m + p z.replace_placeholder(i) input_data = np.random.rand(dim) result = z.eval([input_data]) assert np.allclose(result[0][0], input_data + 3 + 10)
def test_outputs(): fwd_state = C.placeholder("placeholder") prev_state = C.sequence.past_value(fwd_state, name="prev_state") z = C.abs(prev_state, "abs") output = z.output z = z.replace_placeholders({fwd_state: z.output}) fwd_state = None prev_state = None z = None for arg in output.owner.arguments: print("Argument name: {}, argument owner name {}".format(arg.name, arg.owner.name))
def test_replace_save_restoreinplace_constant(tmpdir): from cntk import placeholder c1 = C.constant(value=0) c2 = C.constant(value=0) c3 = C.constant(value=0) p1 = placeholder(name="placeholder1") p2 = placeholder(name="placeholder2") result = (c1 * p1) * c2 + c3 + p2 p3 = placeholder(name="placeholder3") p4 = placeholder(name="placeholder4") block = C.ops.as_block(result, [(p2, p4), (p1, p3)], "test_block") arg_map = { p3: C.constant(value=0) } block.replace_placeholders(arg_map) model_filename = str(tmpdir / 'simple_block.mod') block.save(model_filename) block.restore(model_filename) assert len(block.placeholders) == 1
def test_free_static_axis_in_recurrence(): x = C.sequence.input_variable((C.FreeDimension, 2)) out_placeholder = C.placeholder() out_past = C.sequence.past_value(out_placeholder) wh = C.parameter(init=np.asarray([[2, 5], [1, 3]], dtype=np.float32)) wx = C.parameter(init=np.asarray([[1, 4], [2, 5]], dtype=np.float32)) out = C.times(x, wx) + C.times(out_past, wh) out.replace_placeholders({out_placeholder : out}) x_data = np.asarray([[0.5, 0.2], [-0.7, 1.2]], np.float32) w_grad, out_val = out.grad({x : x_data}, wrt=[wh, wx], outputs=[out]) assert np.allclose(out_val, [[[[0.9, 3.], [1.7, 3.2]]]]) assert np.allclose(w_grad[wx], [[-0.2, -0.2], [1.4, 1.4]])
def LocalResponseNormalization(k, n, alpha, beta, name=''): x = C.placeholder(name='lrn_arg') x2 = C.square(x) # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed. x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1) W = C.constant(alpha/(2*n+1), (1,2*n+1,1,1), name='W') # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1 y = C.convolution (W, x2s) # reshape back to remove the fake singleton reduction dimension b = C.reshape(y, C.InferredDimension, 0, 2) den = C.exp(beta * C.log(k + b)) apply_x = C.element_divide(x, den) return apply_x
def test_replace_placeholder_s(): left_val = [[10,2]] right_val = [[2],[3]] p = C.placeholder(shape=(1,2)) c = C.constant(left_val) op = C.times(p, right_val) op.replace_placeholders({p:c}) assert op.eval() == 26 op = C.times(p, right_val) op.replace_placeholder(c) assert op.eval() == 26
def test_squeeze(operand_shape, axis, device_id, precision): operand = np.arange(np.prod(operand_shape)).reshape(operand_shape).astype('f') expected = np.squeeze(operand, axis) expected_forward = [expected] expected_backward = { 'arg': [np.ones_like(operand)], } from .. import squeeze, placeholder p = C.placeholder() squeeze_with_axis = C.squeeze(p, axis) _test_unary_op(precision, device_id, squeeze_with_axis, operand, expected_forward, expected_backward)
def BNBiRecurrence(fwd, bwd, test_dual=True): # special version that calls one shared BN instance at two places, for testing BN param tying F = Recurrence(fwd) G = Recurrence(fwd, go_backwards=True) BN = BatchNormalization(normalization_time_constant=-1) x = placeholder() # The following code applies the same BN function object twice. # When running whole-corpus estimation of means/vars, this must lead to the same estimate # although it is estimated on twice the amount of data (each sample is used twice). # Hence, this is the test that proves that the parameter sharing works. x1 = BN(x) x2 = BN(x) if test_dual else x1 # In double precision with corpus aggregation, these lead to the same result. apply_x = splice (F(x1), G(x2)) return apply_x
def test_expand_dims(operand_shape, axis, device_id, precision): if axis is None or isinstance(axis, tuple): return operand = np.arange(np.prod(operand_shape)).reshape(operand_shape).astype('f') expected = np.expand_dims(operand, axis) expected_forward = [expected] expected_backward = { 'arg': [np.ones_like(operand)], } from .. import expand_dims, placeholder p = C.placeholder() expand_dims_with_axis = C.expand_dims(p, axis) _test_unary_op(precision, device_id, expand_dims_with_axis, operand, expected_forward, expected_backward)
def test_op_as_block(input_shape, output_shape, expected_output_shape, device_id, precision): # We test using reshape as the operation that is encapsulated in a block dev = cntk_device(device_id) from cntk.internal import sanitize_dtype_cntk from .. import reshape, element_times, as_block num_tensor_elements = np.multiply.reduce(input_shape) input_tensor = np.arange(num_tensor_elements, dtype=PRECISION_TO_TYPE[precision]).reshape(input_shape) input_reshaped = input_tensor.reshape(expected_output_shape) a_placeholder = C.placeholder(); a_reshaped = reshape(a_placeholder, output_shape) const_input_reshaped = constant(input_reshaped, device=dev) block_composite = element_times(a_reshaped, const_input_reshaped, name='element_times_inside_block') a = C.input_variable(shape=input_tensor.shape, dtype=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]), needs_gradient=True, name='a') input_op = as_block(block_composite, [(a_placeholder, a)], 'reshape_test_op', block_instance_name='reshape_test_op') # Test some basic methods related to blocks assert input_op.is_composite block_primitive = input_op.root_function.find_by_name('reshape_test_op') assert block_primitive.name == 'reshape_test_op' assert block_primitive.is_primitive assert block_primitive.is_block element_times_inside_block = block_primitive.block_root.find_by_name('element_times_inside_block') assert element_times_inside_block.name == 'element_times_inside_block' assert element_times_inside_block.is_primitive block_arguments_map = block_primitive.block_arguments_mapping assert len(block_arguments_map) == 1 expected_forward = [input_reshaped**2] expected_backward = {a: input_tensor} # create batch input_tensor.shape = (1,) + input_tensor.shape forward_input = {a: input_tensor} unittest_helper(input_op, forward_input, expected_forward, expected_backward, device_id=device_id, precision=precision)
def create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, input_features, freeze=False): # Load the pretrained classification net and find nodes base_model = load_model(base_model_file) feature_node = find_by_name(base_model, feature_node_name) last_node = find_by_name(base_model, last_hidden_node_name) # Clone the desired layers with fixed weights cloned_layers = combine([last_node.owner]).clone( CloneMethod.freeze if freeze else CloneMethod.clone, {feature_node: placeholder(name='features')}) # Add new dense layer for class prediction feat_norm = input_features - Constant(114) cloned_out = cloned_layers(feat_norm) z = Dense(num_classes, activation=None, name=new_output_node_name) (cloned_out) return z
def create_trainer(use_sparse, device): a = C.sequence.input_variable(shape=input_shape, is_sparse=use_sparse, name='input') w_i = C.parameter(init=w_init_i, device=dev) a_projection = times(a, w_i) p_o = C.placeholder() h = C.sequence.past_value(p_o) w_h = C.parameter(init=w_init_h, device=dev) h_projection = times(h, w_h) z = a_projection + h_projection z = z.replace_placeholder(z) z = reshape(z, label_shape) l = C.sequence.input_variable(shape=label_shape, is_sparse=use_sparse, name='label') loss = cross_entropy_with_softmax(z, l, axis=-1) trainer = C.Trainer(z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample))) return (a, l, w_i, w_h, trainer)
def test_op_broadcast_as_in_loop(device_id): a_data = [AA([1]), AA([2]), AA([3])] b_data = [AA([[2]]), AA([[2], [3]]), AA([[2], [3], [4]])] a = C.input_variable(shape=(1,), name='a') b = C.sequence.input_variable(shape=(1,), name='b') out_placeholder = C.placeholder() out_delayed = C.sequence.past_value(out_placeholder, time_step=5) out_delayed_plus_b = out_delayed + b out = C.sequence.broadcast_as(a, out_delayed_plus_b) out.replace_placeholder(out) res = out.eval({a: a_data, b: b_data}) assert np.array_equal(res[0], np.asarray([[1.]])) assert np.array_equal(res[1], np.asarray([[2.], [2.]])) assert np.array_equal(res[2], np.asarray([[3.], [3.], [3.]]))
def ForwardDeclaration(name='forward_declaration'): ''' Helper for recurrent network declarations. Returns a placeholder variable with an added method ``resolve_to()`` to be called at the end to close the loop. This is used for explicit graph building with recurrent connections. Example: >>> # create a graph with a recurrent loop to compute the length of an input sequence >>> from cntk.layers.typing import * >>> x = C.input_variable(**Sequence[Tensor[2]]) >>> ones_like_input = C.sequence.broadcast_as(1, x) # sequence of scalar ones of same length as input >>> out_fwd = ForwardDeclaration() # placeholder for the state variables >>> out = C.sequence.past_value(out_fwd, initial_state=0) + ones_like_input >>> out_fwd.resolve_to(out) >>> length = C.sequence.last(out) >>> x0 = np.reshape(np.arange(6,dtype=np.float32),(1,3,2)) >>> x0 array([[[ 0., 1.], [ 2., 3.], [ 4., 5.]]], dtype=float32) >>> length(x0) array([ 3.], dtype=float32) Returns: :class:`~cntk.variables.Variable`: a placeholder variable with a method ``resolve_to()`` that resolves it to another variable ''' var_fwd = placeholder(name=name) def resolve_to(var): #from cntk import cntk_py #if isinstance(var, cntk_py.Function): # var.replace_placeholders({var_fwd: var.output}) # resolves var_fwd := var #else: # TODO: ^^ should no longer be needed; delete once confirmed var.owner.replace_placeholders({var_fwd: var}) # resolves var_fwd := var var_fwd.resolve_to = resolve_to return var_fwd
def BinaryConvolution(operand, filter_shape, num_filters=1, channels=1, init=C.glorot_uniform(), pad=False, strides=1, bias=True, init_bias=0, op_name='BinaryConvolution', name=''): """ arguments: operand: tensor to convolve filter_shape: tuple indicating filter size num_filters: number of filters to use channels: number of incoming channels init: type of initialization to use for weights """ kernel_shape = (num_filters, channels) + filter_shape W = C.parameter(shape=kernel_shape, init=init, name="filter") binary_convolve_operand_p = C.placeholder(operand.shape, operand.dynamic_axes, name="operand") binary_convolve = C.convolution(CustomMultibit(W, 1), CustomMultibit(binary_convolve_operand_p, 1), auto_padding=[False, pad, pad], strides=[strides]) r = C.as_block(binary_convolve, [(binary_convolve_operand_p, operand)], 'binary_convolve') bias_shape = (num_filters, 1, 1) b = C.parameter(shape=bias_shape, init=init_bias, name="bias") r = r + b # apply learnable param relu P = C.parameter(shape=r.shape, init=init, name="prelu") r = C.param_relu(P, r) return r
def test_recurrance_with_udf_without_layers(): name = "SimpleUdf" def udf(a): return C.user_function(SimpleUdf(a, name=name)) # input varibale and the data. x = C.sequence.input_variable(needs_gradient=True, shape=(2, )) x0 = np.reshape(np.arange(16.0, dtype=np.float32), (2, 4, 2)) print(x0) # creates a recurrent loop. p = C.placeholder(shape=(2, )) past = C.sequence.past_value(p) z = udf(x) * udf(past) + C.Parameter((2, ), init=[1, 1]) z.replace_placeholders({p: z.outputs[0]}) #C.logging.graph.plot(z, "recurrent.pdf") out = z.eval({x: x0}) print(out) expected_out = [ np.array([1, 1, 3, 4, 13, 21, 79, 148], dtype=np.float32).reshape(4, 2), np.array([1, 1, 11, 12, 133, 157, 1863, 2356], dtype=np.float32).reshape(4, 2) ] assert np.array_equal(out, expected_out) gradient, result = z.grad({x: x0}, wrt=[x], outputs=[z.output]) print(result) assert np.array_equal(result, expected_out) expected_grad = [ np.array([0, 0, 29, 41, 21, 32, 13, 21], dtype=np.float32).reshape(4, 2), np.array([0, 0, 181, 209, 165, 192, 133, 157], dtype=np.float32).reshape(4, 2) ] print(gradient) assert np.array_equal(gradient, expected_grad)
def func(x_var): x = C.placeholder() WT = C.Parameter(( dim, dim, ), init=transform_weight_initializer, name=name + '_WT') bT = C.Parameter(dim, init=transform_bias_initializer, name=name + '_bT') WU = C.Parameter(( dim, dim, ), init=update_weight_initializer, name=name + '_WU') bU = C.parameter(dim, init=update_bias_initializer, name=name + '_bU') transform_gate = C.sigmoid(C.times(x, WT, name=name + '_T') + bT) update = C.tanh(C.times(x, WU, name=name + '_U') + bU) return C.as_block(update * transform_gate + (1 - transform_gate) * x, [(x, x_var)], 'SingleInner', 'SingleInner' + name)
def func(x_var): x = C.placeholder() WT = C.Parameter(( dim, dim, ), init=transform_weight_initializer, name=name + '_WT') bT = C.Parameter(dim, init=transform_bias_initializer, name=name + '_bT') WU = C.Parameter(( dim, dim, ), init=update_weight_initializer, name=name + '_WU') bU = C.Parameter(dim, init=update_bias_initializer, name=name + '_bU') transform_gate = C.sigmoid(C.times(x, WT, name=name + '_T') + bT) update = C.relu(C.times(x, WU, name=name + '_U') + bU) return C.as_block(x + transform_gate * (update - x), [(x, x_var)], 'HighwayBlock', 'HighwayBlock' + name)
def test_topk_backward(device_id, precision): def check_grad_last_axis(input, root, indices, output): d = input.shape[-1] k = indices.shape[-1] expected_output = np.zeros_like(input).reshape(-1,d) ind = np.reshape(indices, (-1,k)) r = np.reshape(root,(-1,k)) assert ind.shape[0] == r.shape[0] == expected_output.shape[0] for i in range(expected_output.shape[0]): for j in range(k): expected_output[i,int(ind[i,j])] = r[i,j] expected_output = expected_output.reshape(input.shape) assert np.allclose(output, expected_output) dt = PRECISION_TO_TYPE[precision] dev = cntk_device(device_id) axis=-1 h = C.placeholder() p = C.parameter((4, 5, 6)) p.value = p.value + np.random.randn(*p.shape) y = C.top_k(h, 3, axis=axis) y.replace_placeholder(p) dy, top = y.forward({}, y.outputs, set([y.outputs[0]])) indices = top[y.outputs[1]] root = np.ones_like(indices) root = root + np.arange(np.prod(root.shape)).reshape(*root.shape) cg = y.backward(dy, {y.outputs[0]:root}, set([p]))[p] check_grad_last_axis(p.value, root, indices, cg) q = C.sequence.input_variable((5,6), needs_gradient=True) q0 = [np.random.randn(4-i,5,6).astype(dt) for i in range(2)] y = C.top_k(q, 3, axis=axis) dy, top = y.forward({q:q0}, y.outputs, set([y.outputs[0]]), device=dev) indices = top[y.outputs[1]] root = [np.ones_like(i) + 100 * k + np.arange(np.prod(i.shape)).reshape(*i.shape) for k,i in enumerate(indices)] cg = y.backward(dy, {y.outputs[0]:root}, set([q]))[q] for i in range(2): check_grad_last_axis(q0[i], root[i], indices[i], cg[i])
def create_network(): input_var = cntk.sequence.input_variable((num_channels, frame_height, frame_width), name='input_var') target_var = cntk.input_variable((num_classes,), is_sparse=True, name='target_var') with cntk.layers.default_options(enable_self_stabilization=True): model = Sequential([ resnet_model(cntk.placeholder()), Label('resnet'), Dense(hidden_dim, name='cnn_fc'), cntk.layers.Stabilizer(), bidirectional_recurrence(LSTM(hidden_dim // 2), LSTM(hidden_dim // 2)), cntk.sequence.last, BatchNormalization(), Dense(num_classes) ])(input_var) return { 'input': input_var, 'target': target_var, 'model': model, 'loss': cntk.cross_entropy_with_softmax(model, target_var), 'metric': cntk.classification_error(model, target_var) }
def test_placeholder(device_id, precision): dt = PRECISION_TO_TYPE[precision] dev = cntk_device(device_id) import cntk.random as cr p = C.placeholder() u = cr.uniform_like(p) x = C.sequence.input_variable((4, 5)) x1 = np.ones((2, 3, 4, 5), dtype=dt) f = u + p f.replace_placeholders({p: x}) fx0, fx1 = f.eval({x: x1}) assert fx0.shape == (3, 4, 5) assert fx1.shape == (3, 4, 5) assert fx0.min() >= 1 assert fx0.max() < 2 assert fx1.min() >= 1 assert fx1.max() < 2
def test_cloning(): p = C.placeholder(shape=(1, ), name='p') i = C.input_variable(shape=(1, ), needs_gradient=True, name='i') res = p + i with pytest.raises(ValueError): res.clone(2) from ..functions import CloneMethod # Test freeze cloned = res.clone(CloneMethod.freeze) assert cloned.inputs[0].name == 'p' assert cloned.inputs[0].uid != p.uid assert cloned.inputs[1].name == 'i' assert cloned.inputs[1].uid != i.uid cloned = res.clone('freeze') assert cloned.inputs[0].name == 'p' assert cloned.inputs[0].uid != p.uid assert cloned.inputs[1].name == 'i' assert cloned.inputs[1].uid != i.uid
def test_sequence_unpack_basic(device_id): dev = cntk_device(device_id) # Unpack a placeholder p = C.placeholder() p_unpacked_outputs = C.sequence.unpack(p, padding_value=0).outputs assert len(p_unpacked_outputs) == 2 x = C.input((C.FreeDimension, 2, 3), is_sparse=False) x_seq_lens = C.input(()) x_seq = C.to_sequence(x, x_seq_lens) x_seq_unpacked = C.sequence.unpack(x_seq, padding_value=-1000.0) x_seq_unpacked_value_output = x_seq_unpacked.outputs[0] x_seq_unpacked_mask_output = x_seq_unpacked.outputs[1] assert len(x_seq_unpacked_value_output.dynamic_axes) == 1 assert x_seq_unpacked_value_output.shape == (C.FreeDimension, 2, 3) seq1_data = [[[0, 1, 1], [0, 1, 0]], [[1, 0, 0], [1, 0, 1]]] seq2_data = [[0, 1, 1], [1, 1, 0]] x_data = [ np.asarray(seq1_data, dtype=np.float32), np.asarray( [seq2_data, [[-100.0, -100.0, -100.0], [-100.0, -100.0, -100.0]]], dtype=np.float32) ] x_seq_lens_data = np.asarray([2, 1], dtype=np.float32) result = x_seq_unpacked.eval({ x: x_data, x_seq_lens: x_seq_lens_data }, device=dev) value = result[x_seq_unpacked_value_output] mask = result[x_seq_unpacked_mask_output] assert np.array_equal(value[0], seq1_data) assert np.array_equal(value[1], [ seq2_data, [[-1000.0, -1000.0, -1000.0], [-1000.0, -1000.0, -1000.0]] ]) assert np.array_equal(mask, [[1, 1], [1, 0]])
def create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, input_features, freeze=False): # Load the pretrained classification net and find nodes base_model = load_model(base_model_file) feature_node = find_by_name(base_model, feature_node_name) last_node = find_by_name(base_model, last_hidden_node_name) # Clone the desired layers with fixed weights cloned_layers = combine([last_node.owner]).clone( CloneMethod.freeze if freeze else CloneMethod.clone, {feature_node: placeholder(name='features')}) # Add new dense layer for class prediction feat_norm = input_features - Constant(114) cloned_out = cloned_layers(feat_norm) z = Dense(num_classes, activation=None, name=new_output_node_name)(cloned_out) return z
def test_op_sequence_reduce_sum(device_id, precision): from .. import sequence a = sequence.input(shape=(1, ), dtype=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]), needs_gradient=True, name='a') sequence_sum_a_plus_sequence_sum_a = sequence.reduce_sum( a) + sequence.reduce_sum(a) a_data = [ AA([[2]], dtype=PRECISION_TO_TYPE[precision]), AA([[2], [3]], dtype=PRECISION_TO_TYPE[precision]), AA([[2], [3], [4]], dtype=PRECISION_TO_TYPE[precision]) ] actual_grad = sequence_sum_a_plus_sequence_sum_a.grad({a: a_data}, [a]) assert np.array_equal(actual_grad[0], np.asarray([[2.]])) assert np.array_equal(actual_grad[1], np.asarray([[2.], [2.]])) assert np.array_equal(actual_grad[2], np.asarray([[2.], [2.], [2.]])) res = sequence_sum_a_plus_sequence_sum_a.eval({a: a_data}) assert np.array_equal(res[0], np.asarray([4.])) assert np.array_equal(res[1], np.asarray([10.])) assert np.array_equal(res[2], np.asarray([18.])) # Verify that calling sequence reduction on a placeholder with known # shape but unknown dynamic axes does not result in a problem p = C.placeholder(shape=(1, )) r = sequence.reduce_sum(p) r.replace_placeholder(a) res = r.eval({a: a_data}) assert np.array_equal(res[0], np.asarray([2.])) assert np.array_equal(res[1], np.asarray([5.])) assert np.array_equal(res[2], np.asarray([9.]))
def create_trainer(use_sparse, device): a = C.sequence.input_variable(shape=input_shape, is_sparse=use_sparse, name='input') w_i = C.parameter(init=w_init_i, device=dev) a_projection = times(a, w_i) p_o = C.placeholder() h = C.sequence.past_value(p_o) w_h = C.parameter(init=w_init_h, device=dev) h_projection = times(h, w_h) z = a_projection + h_projection z = z.replace_placeholder(z) z = reshape(z, label_shape) l = C.sequence.input_variable(shape=label_shape, is_sparse=use_sparse, name='label') loss = cross_entropy_with_softmax(z, l, axis=-1) trainer = C.Trainer( z, (loss, None), C.sgd(z.parameters, lr=C.learning_rate_schedule(0.7, C.UnitType.sample))) return (a, l, w_i, w_h, trainer)
def gpt2_block(token_dims: int, head_dims: int, as_block: bool = False, name: str = 'gpt2_block'): X = C.placeholder(token_dims, dynamic_axes=(C.Axis.default_batch_axis(), C.Axis.default_dynamic_axis()), name=name) sa_layer = gpt2_self_attention(token_dims, head_dims) ff_layer = feed_forward_layer(4 * token_dims, token_dims) sa = sa_layer(layer_normalization(X)) sa = X + sa ff = ff_layer(layer_normalization(sa)) ff = X + ff result = ff if as_block: return C.as_block(result, [(X, X)], 'gpt2_block', 'gpt2_block') return result
def BiRecurrence(fwd, bwd): F = Recurrence(fwd) G = Recurrence(fwd, go_backwards=True) x = placeholder() apply_x = splice(F(x), G(x)) return apply_x
def sample(self, batchSize): z = self.prior.sample(batchSize).astype(np.float32) logp = C.log(self.prior.pdf(z)) x = self.reverse(z) return x def parameters(self): return self.forward.parameters if __name__ == '__main__': nets = lambda: C.layers.Sequential([ C.layers.Dense(256, activation=C.leaky_relu), C.layers.Dense(256, activation=C.leaky_relu), C.layers.Dense(2, activation=C.tanh) ])(C.placeholder(2)) nett = lambda: C.layers.Sequential([ C.layers.Dense(256, activation=C.leaky_relu), C.layers.Dense(256, activation=C.leaky_relu), C.layers.Dense(2) ])(C.placeholder(2)) masks = C.Constant(np.array([[0, 1], [1, 0]] * 3).astype(np.float32), name='mask') prior = MultivariateNormalDiag(loc=[0., 0.], scale_diag=[1., 1.]) flow = RealNVP(nets, nett, masks, prior) loss = -C.reduce_mean(flow.log_prob) learner = C.adam(loss.parameters, C.learning_parameter_schedule(1e-1), C.momentum_schedule(0.9)) trainer = C.Trainer(flow.forward, (loss, None), learner)
def rnet_output_layer(self, attention_context, query): att_context = C.placeholder(shape=(2 * self.hidden_dim, )) q_processed = C.placeholder(shape=(2 * self.hidden_dim, )) wuq = C.parameter(shape=(2 * self.hidden_dim, 2 * self.hidden_dim), init=C.glorot_uniform()) whp = C.parameter(shape=(2 * self.hidden_dim, 2 * self.hidden_dim), init=C.glorot_uniform()) wha = C.parameter(shape=(2 * self.hidden_dim, 2 * self.hidden_dim), init=C.glorot_uniform()) v = C.parameter(shape=(2 * self.hidden_dim, 1), init=C.glorot_uniform()) bias = C.parameter(shape=(2 * self.hidden_dim), init=C.glorot_uniform()) whp_end = C.parameter(shape=(2 * self.hidden_dim, 2 * self.hidden_dim), init=C.glorot_uniform()) wha_end = C.parameter(shape=(2 * self.hidden_dim, 2 * self.hidden_dim), init=C.glorot_uniform()) v_end = C.parameter(shape=(2 * self.hidden_dim, 1), init=C.glorot_uniform()) # sequence[tensor[1]] q_len x 1 s0 = C.times(C.tanh(C.times(q_processed, wuq) + bias), v) a0 = C.sequence.softmax(s0) rQ = C.sequence.reduce_sum(a0 * q_processed) # sequence[tensor[1]] plen x 1 ts = C.reshape( C.times( C.tanh( C.times(att_context, whp) + C.times(C.sequence.broadcast_as(rQ, att_context), wha)), v), (-1)) # sequence[tensor[1]] ta = C.sequence.softmax(ts) # sequence[2d] 1 x 2d c0 = C.reshape(C.sequence.reduce_sum(ta * att_context), (2 * self.hidden_dim)) # sequence[tensor[2d]] ha1 = C.layers.blocks.GRU(2 * self.hidden_dim)(rQ, c0) # sequence[tensor[1]] plen x 1 s1 = C.reshape( C.times( C.tanh( C.times(att_context, whp_end) + C.times(C.sequence.broadcast_as(ha1, att_context), wha_end) ), v_end), (-1)) # sequence[tensor[1]] plen x 1 a1 = C.sequence.softmax(s1) return C.as_block(C.combine([ts, s1]), [(att_context, attention_context), (q_processed, query)], 'output_layer', 'output_layer')
def attention_layer(self, context, query, layer): q_processed = C.placeholder(shape=(2 * self.hidden_dim, )) p_processed = C.placeholder(shape=(2 * self.hidden_dim, )) qvw, qvw_mask = C.sequence.unpack(q_processed, padding_value=0).outputs wq = C.parameter(shape=(2 * self.hidden_dim, 2 * self.hidden_dim), init=C.glorot_uniform()) wp = C.parameter(shape=(2 * self.hidden_dim, 2 * self.hidden_dim), init=C.glorot_uniform()) wg = C.parameter(shape=(8 * self.hidden_dim, 8 * self.hidden_dim), init=C.glorot_uniform()) v = C.parameter(shape=(2 * self.hidden_dim, 1), init=C.glorot_uniform()) # seq[tensor[2d]] p_len x 2d wpt = C.reshape(C.times(p_processed, wp), (-1, 2 * self.hidden_dim)) # q_len x 2d wqt = C.reshape(C.times(qvw, wq), (-1, 2 * self.hidden_dim)) # seq[tensor[q_len]] S = C.reshape( C.times(C.tanh(C.sequence.broadcast_as(wqt, p_processed) + wpt), v), (-1)) qvw_mask_expanded = C.sequence.broadcast_as(qvw_mask, p_processed) # seq[tensor[q_len]] S = C.element_select(qvw_mask_expanded, S, C.constant(-1e+30)) # seq[tensor[q_len]] A = C.softmax(S, axis=0) # seq[tensor[2d]] swap_qvw = C.swapaxes(qvw) cq = C.reshape( C.reduce_sum(A * C.sequence.broadcast_as(swap_qvw, A), axis=1), (-1)) # seq[tensor[4d]] uc_concat = C.splice(p_processed, cq, p_processed * cq, cq * cq) # seq[tensor[4d]] gt = C.tanh(C.times(uc_concat, wg)) # seq[tensor[4d]] uc_concat_star = gt * uc_concat # seq[tensor[4d]] vp = C.layers.Sequential([ C.layers.Dropout(self.dropout), OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name=layer + '_attention_rnn') ])(uc_concat_star) return C.as_block(vp, [(p_processed, context), (q_processed, query)], 'attention_layer', 'attention_layer')
def OneWordLookahead(): x = C.placeholder() apply_x = C.splice(x, C.sequence.future_value(x)) return apply_x
def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True): ''' Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). Args: conv_out: The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. im_info: A CNTK variable or constant containing (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 cfg: The configuration dictionary add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses Returns: rpn_rois - the proposed ROIs rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness) ''' # RPN network # init = 'normal', initValueScale = 0.01, initBias = 0.1 num_channels = cfg["MODEL"].RPN_NUM_CHANNELS rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1, init=normal(scale=0.01), init_bias=0.0)(conv_out) rpn_cls_score = Convolution( (1, 1), 18, activation=None, name="rpn_cls_score", init=normal(scale=0.01), init_bias=0.0)(rpn_conv_3x3) # 2(bg/fg) * 9(anchors) rpn_bbox_pred = Convolution( (1, 1), 36, activation=None, name="rpn_bbox_pred", init=normal(scale=0.01), init_bias=0.0)(rpn_conv_3x3) # 4(coords) * 9(anchors) # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W) num_predictions = int(rpn_cls_score.shape[0] / 2) rpn_cls_score_rshp = reshape( rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp") p_rpn_cls_score_rshp = cntk.placeholder() rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0) rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob') rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape") # proposal layer rpn_rois = create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg) rpn_losses = None if (add_loss_functions): # RPN targets # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ... proposal_layer_params = "'feat_stride': {}\n'scales':\n - {}". \ format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES])) atl = user_function( AnchorTargetLayer( rpn_cls_score, scaled_gt_boxes, im_info, rpn_batch_size=cfg["TRAIN"].RPN_BATCHSIZE, rpn_fg_fraction=cfg["TRAIN"].RPN_FG_FRACTION, clobber_positives=cfg["TRAIN"].RPN_CLOBBER_POSITIVES, positive_overlap=cfg["TRAIN"].RPN_POSITIVE_OVERLAP, negative_overlap=cfg["TRAIN"].RPN_NEGATIVE_OVERLAP, param_str=proposal_layer_params)) rpn_labels = atl.outputs[0] rpn_bbox_targets = atl.outputs[1] rpn_bbox_inside_weights = atl.outputs[2] # classification loss p_rpn_labels = cntk.placeholder() p_rpn_cls_score_rshp = cntk.placeholder() keeps = cntk.greater_equal(p_rpn_labels, 0.0) fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets") bg_labels = minus(1, fg_labels, name="bg_targets") rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0) rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0) rpn_loss_cls = element_times(rpn_ce, keeps) # The terms that are accounted for in the cls loss are those that have a label >= 0 cls_num_terms = reduce_sum(keeps) cls_normalization_factor = 1.0 / cls_num_terms normalized_rpn_cls_loss = reduce_sum( rpn_loss_cls) * cls_normalization_factor reduced_rpn_loss_cls = cntk.as_block( normalized_rpn_cls_loss, [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'CE_with_ignore', 'norm_rpn_cls_loss') # regression loss p_rpn_bbox_pred = cntk.placeholder() p_rpn_bbox_targets = cntk.placeholder() p_rpn_bbox_inside_weights = cntk.placeholder() rpn_loss_bbox = SmoothL1Loss(cfg.SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0) # The bbox loss is normalized by the rpn batch size bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE normalized_rpn_bbox_loss = reduce_sum( rpn_loss_bbox) * bbox_normalization_factor reduced_rpn_loss_bbox = cntk.as_block( normalized_rpn_bbox_loss, [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets), (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)], 'SmoothL1Loss', 'norm_rpn_bbox_loss') rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses") return rpn_rois, rpn_losses
def BiRecurrence(fwd, bwd): F = C.layers.Recurrence(fwd) G = C.layers.Recurrence(bwd, go_backwards=True) x = C.placeholder() apply_x = C.splice(F(x), G(x)) # concatenate the tensors return apply_x
def flow_forward(input_dim: int, act_func_pair: tuple = (None, None), batch_norm: bool = False): chunk = {} log_det_J = 0 chunk['input_dim'] = input_dim _ph = C.placeholder(input_dim, name='place_holder') _out = _ph if batch_norm: # _bn = C.layers.BatchNormalization(name='batch_norm')(_ph) # chunk['scale'] = _bn.parameters[0] # chunk['bias'] = _bn.parameters[1] chunk['mu'] = C.Constant(np.zeros(shape=input_dim)) chunk['var'] = C.Constant(np.ones(shape=input_dim)) _eps = C.Constant(1e-7) _mu = C.reduce_mean(_ph, axis=C.Axis.default_batch_axis()) _var = C.reduce_mean(C.square(_ph-_mu), axis=C.Axis.default_batch_axis()) chunk['muB'] = _mu chunk['varB'] = _var # _bn = (_ph-chunk['mu'])/C.sqrt(chunk['var']+_eps) _bn = C.sqrt(chunk['var']+_eps)*_ph + chunk['mu'] _ph = _bn log_det_J += -0.5*C.reduce_sum(C.log((_var+_eps))) # log_det_J += C.reduce_sum(C.log()) chunk['W_rot_mat'] = _W = C.parameter((input_dim, input_dim)) _W.value = random_rotation_matrix = special_ortho_group.rvs(input_dim) # _W.value = np.roll(np.eye(input_dim),input_dim//2,axis=0) _out = _ph@_W log_det_J += C.log(C.abs(C.det(_W))) # or # log_det_J += C.slogdet(_W)[1] _half_dim = input_dim//2 _x1 = _out[:_half_dim] _x2 = _out[_half_dim:] _log_s_func, _t_func = act_func_pair if _log_s_func is None: # basic network _log_s_func = C.layers.Sequential([ C.layers.Dense(256, C.leaky_relu), C.layers.Dense(256, C.leaky_relu), C.layers.Dense(_half_dim, C.tanh), ])#(C.placeholder(input_dim, name='place_holder')) if _t_func is None: # basic network _t_func = C.layers.Sequential([ C.layers.Dense(256, C.leaky_relu), C.layers.Dense(256, C.leaky_relu), C.layers.Dense(_half_dim), ])#(C.placeholder(input_dim, name='place_holder')) chunk['log_s_func'] = _log_s_func chunk['t_func'] = _t_func _log_s, _t = _log_s_func(_x2), _t_func(_x2) _s = C.exp(_log_s) _y1 = _s*_x1 + _t _y2 = _x2 _Y = C.splice(_y1, _y2) chunk['output'] = _Y log_det_J += C.reduce_sum(_log_s) return _Y, log_det_J, chunk
def create_criterion_function(model): labels = C.placeholder(name='labels') ce = C.cross_entropy_with_softmax(model, labels) errs = C.classification_error(model, labels) return C.combine([ce, errs]) # (features, labels) -> (loss, metric)
def _convert_optimized_rnnstack(root_func, map_param_to_func): ''' Internal implementation that converts root_func that contains cudnn optimized_rnnstack to use non-cudnn functions, so it can be used in non-CUDA environment Args: root_func: a root function of a graph that contains optimized_rnnstacks map_param_to_func: a mapping of converted rnn functions for parameter sharing Returns: converted root_func on GEMM based implementation of rnn that can be used on CPU ''' # recursively convert for blocks in root_func blocks = C.logging.graph.depth_first_search( root_func, lambda x: type(x) == C.Function and x.root_function.is_block, depth=0) for i in range(len(blocks)): # search for blocks again in case block input/output has been modified blocks1 = C.logging.graph.depth_first_search( root_func, lambda x: type(x) == C.Function and x.root_function.is_block, depth=0) block = blocks1[ i] # assuming depth_first_search order to be stable, so use the old index on new search results block_root = C.as_composite(block.block_root) new_block_root = _convert_optimized_rnnstack(block_root, map_param_to_func) if new_block_root != block_root: block_arguments_mapping = dict(block.block_arguments_mapping) new_block_arguments_mapping = [] for arg, new_arg in zip(block_root.arguments, new_block_root.arguments): new_block_arguments_mapping += [(new_arg, block_arguments_mapping[arg])] new_block = C.as_block(new_block_root, new_block_arguments_mapping, block.op_name, block.name) if all([x not in root_func.outputs for x in block.outputs]) or all( [x in block.outputs for x in root_func.outputs]): root_func = root_func.clone( C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs))) else: new_outputs = [ new_block.outputs[block.outputs.index(x)] if x in block.outputs else None for x in root_func.outputs ] root_func_nonreplaced = C.combine( [x for x in root_func.outputs if x not in block.outputs]) root_func_nonreplaced_clone = root_func_nonreplaced.clone( C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs))) idx = 0 for nonreplaced_output in root_func_nonreplaced_clone.outputs: while new_outputs[idx]: idx += 1 new_outputs[idx] = nonreplaced_output root_func = C.combine(new_outputs) # replace all optimized_rnnstack instances in root_func cudnn_rnns = C.logging.graph.depth_first_search( root_func, lambda x: type(x) == C.Function and x.root_function.op_name == 'OptimizedRNNStack', depth=0) for cudnn_rnn in cudnn_rnns: param = cudnn_rnn.parameters[0] if map_param_to_func[param]: #shared parameter, clone converted = map_param_to_func[param][0].clone( C.CloneMethod.share, { map_param_to_func[param][1]: cudnn_rnn.inputs[0], map_param_to_func[param][2]: C.placeholder() }) else: #unique or first parameter, convert converted = _from_optimized_rnnstack(cudnn_rnn) map_param_to_func[param] = ( converted, cudnn_rnn.inputs[0], cudnn_rnn.output, ) if not cudnn_rnn.output in root_func.outputs: root_func = root_func.clone(C.CloneMethod.share, {cudnn_rnn.output: converted.output}) else: # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed if len(root_func.outputs) > 1: root_func = C.combine([ converted if x == cudnn_rnn.output else x for x in root_func.outputs ]) else: root_func = converted return root_func
def create_transfer_learning_model(input, num_classes, model_file, freeze=False): base_model = load_model(model_file) base_model = C.as_composite(base_model[3].owner) # Load the pretrained classification net and find nodes feature_node = C.logging.find_by_name(base_model, feature_node_name) last_node = C.logging.find_by_name(base_model, last_hidden_node_name) base_model = C.combine([last_node.owner]).clone(C.CloneMethod.freeze if freeze else C.CloneMethod.clone, {feature_node: C.placeholder(name='features')}) base_model = base_model(C.input_variable((num_channels, image_height, image_width))) r1 = C.logging.find_by_name(base_model, "z.x.x.r") r2_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.r") r3_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.x.x.r") r4_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.x.x.x.x.r") up_r1 = OneByOneConvAndUpSample(r1, 3, num_classes) up_r2_2 = OneByOneConvAndUpSample(r2_2, 2, num_classes) up_r3_2 = OneByOneConvAndUpSample(r3_2, 1, num_classes) up_r4_2 = OneByOneConvAndUpSample(r4_2, 0, num_classes) merged = C.splice(up_r1, up_r3_2, up_r2_2, axis=0) resnet_fcn_out = Convolution((1, 1), num_classes, init=he_normal(), activation=sigmoid, pad=True)(merged) z = UpSampling2DPower(resnet_fcn_out,2) return z
def with_lookahead(): x = placeholder() future_x = sequence.future_value(x) apply_x = splice(x, future_x) return apply_x
def output_layer(self, embed, attention_context, model_context, aw, q_processed, c_processed,cw): cw_ph=C.placeholder() att_context = C.placeholder(shape=(8*self.hidden_dim,)) query_processed = C.placeholder(shape=(2*self.hidden_dim,)) context_processed = C.placeholder(shape=(2*self.hidden_dim,)) mod_context = C.placeholder(shape=(2*self.hidden_dim)) a_onehot = C.placeholder(shape=(self.vocab_size+1,)) start_logits = C.layers.Dense(1, name='out_start')(C.dropout(C.splice(mod_context, att_context), self.dropout)) start_hardmax = seq_hardmax(start_logits) att_mod_ctx = C.sequence.last(C.sequence.gather(mod_context, start_hardmax)) att_mod_ctx_expanded = C.sequence.broadcast_as(att_mod_ctx, att_context) end_input = C.splice(att_context, mod_context, att_mod_ctx_expanded, mod_context * att_mod_ctx_expanded) m2 = OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name='output_rnn')(end_input) end_logits = C.layers.Dense(1, name='out_end')(C.dropout(C.splice(m2, att_context), self.dropout)) start_flag = C.hardmax(start_logits) end_flag = C.hardmax(end_logits) def create_model(): # Encoder: (input*) --> (h0, c0) # Create multiple layers of LSTMs by passing the output of the i-th layer # to the (i+1)th layer as its input with C.layers.default_options(enable_self_stabilization=True, go_backwards=False): LastRecurrence = C.layers.Recurrence encode = C.layers.Sequential([ C.layers.Stabilizer(), OptimizedRnnStack(self.hidden_dim, return_full_state=True), ]) encode_c = C.layers.Sequential([ C.layers.Stabilizer(), OptimizedRnnStack(self.hidden_dim, return_full_state=True), ]) # Decoder: (history*, input*) --> unnormalized_word_logp* # where history is one of these, delayed by 1 step and <s> prepended: # - training: labels # - testing: its own output hardmax(z) (greedy decoder) with C.layers.default_options(enable_self_stabilization=True): # sub-layers stab_in = C.layers.Stabilizer() rec_blocks = [C.layers.LSTM(self.hidden_dim) for i in range(self.num_layers)] stab_out = C.layers.Stabilizer() proj_out = C.layers.Dense(self.vocab_size+1, name='out_proj') # attention model attention_model = C.layers.AttentionModel(self.attention_dim, name='attention_model') # :: (h_enc*, h_dec) -> (h_dec augmented) hstate_dense = C.layers.Dense(self.hidden_dim, activation=C.tanh, input_rank=1) cstate_dense = C.layers.Dense(self.hidden_dim, activation=C.tanh, input_rank=1) W_dense = C.layers.Dense(2*self.hidden_dim, input_rank=1) U_dense = C.layers.Dense(2*self.hidden_dim, input_rank=1) V_dense = C.layers.Dense(2*self.hidden_dim, input_rank=1) maxout = C.layers.MaxPooling((2,), strides=2) # layer function @C.Function def decode(history, q, c, start_logits, end_logits): q = encode(q) c = encode_c(C.splice(c, start_logits, end_logits, axis=0)) r = history r = stab_in(r) q_last_h = C.sequence.last(q.outputs[0]) q_last_c = C.sequence.last(q.outputs[1]) c_last_h = C.sequence.last(c.outputs[0]) c_last_c = C.sequence.last(c.outputs[1]) initial_hstate = hstate_dense(C.splice(q_last_h, c_last_h)) initial_cstate = cstate_dense(C.splice(q_last_c, c_last_c)) rec_block = rec_blocks[0] # LSTM(hidden_dim) # :: (dh, dc, x) -> (h, c) @C.Function def find_embed(x): gx, ngx = C.slice(x, 0, 0, self.wg_dim), C.slice(x, 0, self.wg_dim, self.vocab_size) return embed(gx, ngx) @C.Function def lstm_with_attention(dh, dc, r, x): history_embed = find_embed(x) h_att = attention_model(c.outputs[0], dh) q_att = attention_model(q.outputs[0], dh) att = C.splice(h_att, q_att) x = C.splice(x, att) x, dc = rec_block(dh, dc, x).outputs # 0*r is a hack because cntk freaks out when r is not used. r = U_dense(att) + W_dense(history_embed) + V_dense(x) + 0*r #bug when W_dense is added first, wtf?! #r = W_dense(embed(gx, ngx)) + U_dense(att) + V_dense(x) + 0*r return x, dc, r _, _, r = C.layers.RecurrenceFrom(lstm_with_attention, return_full_state=True)(initial_hstate, initial_cstate, C.Constant(np.zeros(2*self.hidden_dim)),r).outputs r = maxout(r) r = stab_out(r) r = proj_out(r) #r = C.softmax(r) r = C.layers.Label('out_proj_out')(r) return r return decode def create_model_train(s2smodel): # model used in training (history is known from labels) # note: the labels must NOT contain the initial <s> @C.Function def model_train(labels, q, c, start_logits, end_logits): # (input*, labels*) --> (word_logp*) # The input to the decoder always starts with the special label sequence start token. # Then, use the previous value of the label sequence (for training) or the output (for execution). past_labels = C.layers.Delay(initial_state=self.sentence_start)(labels) return s2smodel(past_labels, q, c, start_logits, end_logits) return model_train def create_model_greedy(s2smodel): # model used in (greedy) decoding (inferencing) (history is decoder's own output) @C.Function def model_greedy(q, c, start_logits, end_logits): # (input*) --> (word_sequence*) # Decoding is an unfold() operation starting from sentence_start. # We must transform s2smodel (history*, input* -> word_logp*) into a generator (history* -> output*) # which holds 'input' in its closure. unfold = C.layers.UnfoldFrom(\ lambda history: s2smodel(history, q, c, start_logits, end_logits) >> C.hardmax, # stop once sentence_end_index was max-scoring output until_predicate=lambda w: w[...,self.sentence_end_index], length_increase=self.sentence_max_length) return unfold(initial_state=self.sentence_start, dynamic_axes_like=c) return model_greedy s2smodel = create_model() model_train = create_model_train(s2smodel)(a_onehot, query_processed, context_processed, start_logits, end_logits) model_greed = create_model_greedy(s2smodel)(query_processed, context_processed, start_logits, end_logits) model_greedy = C.argmax(model_greed,0) context = C.argmax(cw_ph,0) return C.as_block( C.combine((model_train, model_greedy, start_logits, end_logits,context)), [(att_context, attention_context), (mod_context, model_context), (a_onehot, aw), (query_processed, q_processed), (context_processed, c_processed),(cw_ph,cw)], 'attention_layer', 'attention_layer')
def run_experiment_cntk(): if os.path.isfile('x_train_imdb.bin'): print('Loading from .bin files') x_train, y_train, x_test, y_test = load_from_files(x_shape=(25000, 500), y_shape=(25000, )) else: print('Loading data...') (x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data( num_words=Constants.max_words) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)') x_train = keras.preprocessing.sequence.pad_sequences( x_train, maxlen=Constants.maxlen) x_test = keras.preprocessing.sequence.pad_sequences( x_test, maxlen=Constants.maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) print('Saving to .bin files') save_to_files(x_train, y_train, x_test, y_test) x = cntk.sequence.input_variable(shape=(), dtype=np.float32) y = cntk.input_variable(shape=(), dtype=np.float32) x_placeholder = cntk.placeholder(shape=(), dynamic_axes=[ cntk.Axis.default_batch_axis(), cntk.Axis.default_dynamic_axis() ]) model = cntk.one_hot(x_placeholder, num_classes=Constants.max_words, sparse_output=True) model = cntk.layers.Embedding(Constants.embedding_dim)(model) model = cntk.layers.Recurrence(cntk.layers.LSTM(32))(model) model = cntk.sequence.last(model) model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model) model.save('ch6-2.cntk.model') model = None model = cntk.load_model('ch6-2.cntk.model') model.replace_placeholders({model.placeholders[0]: x}) loss_function = cntk.binary_cross_entropy(model.output, y) round_predictions = cntk.round(model.output) equal_elements = cntk.equal(round_predictions, y) accuracy_function = cntk.reduce_mean(equal_elements, axis=cntk.Axis.all_static_axes()) max_epochs = 10 batch_size = 128 learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.01), cntk.learning_parameter_schedule_per_sample(0.9)) progress_printer = cntk.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = cntk.Trainer(model, (loss_function, accuracy_function), [learner], progress_printer) evaluator = cntk.Evaluator(accuracy_function) cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer, evaluator)
def converter(self, cudnn_rnn): param = cudnn_rnn.parameters[0] if self.map_param_to_func[param]: #shared parameter, clone converted = self.map_param_to_func[param][0].clone(C.CloneMethod.share, {self.map_param_to_func[param][1] : cudnn_rnn.inputs[0], self.map_param_to_func[param][2] : C.placeholder()}) else: #unique or first parameter, convert converted = _from_optimized_rnnstack(cudnn_rnn) self.map_param_to_func[param] = (converted, cudnn_rnn.inputs[0], cudnn_rnn.output,) return converted
def load_model(self): if self.__model: raise Exception("Model already loaded") trained_frcnn_model = load_model(self.__model_path) self.__is_python_model = True if ( len(trained_frcnn_model.arguments) < 3) else False if (self.__is_python_model): self.__args_indices = {"features": 0, "rois": 1} self.__nr_rois = trained_frcnn_model.arguments[ self.__args_indices["rois"]].shape[0] self.__resize_width = trained_frcnn_model.arguments[ self.__args_indices["features"]].shape[1] self.__resize_height = trained_frcnn_model.arguments[ self.__args_indices["features"]].shape[2] self.labels_count = trained_frcnn_model.arguments[ self.__args_indices["rois"]].shape[1] self.__model = trained_frcnn_model else: # cache indices of the model arguments args_indices = {} for i, arg in enumerate(trained_frcnn_model.arguments): args_indices[arg.name] = i self.__nr_rois = trained_frcnn_model.arguments[ args_indices["rois"]].shape[0] self.__resize_width = trained_frcnn_model.arguments[ args_indices["features"]].shape[1] self.__resize_height = trained_frcnn_model.arguments[ args_indices["features"]].shape[2] self.labels_count = trained_frcnn_model.arguments[ args_indices["roiLabels"]].shape[1] # next, we adjust the clone the model and create input nodes just for the features (image) and ROIs # This will make sure that only the calculations that are needed for evaluating images are performed # during test time # # find the original features and rois input nodes features_node = find_by_name(trained_frcnn_model, "features") rois_node = find_by_name(trained_frcnn_model, "rois") # find the output "z" node z_node = find_by_name(trained_frcnn_model, 'z') # define new input nodes for the features (image) and rois image_input = input_variable(features_node.shape, name='features') roi_input = input_variable(rois_node.shape, name='rois') # Clone the desired layers with fixed weights and place holder for the new input nodes cloned_nodes = combine([z_node.owner]).clone( CloneMethod.freeze, { features_node: placeholder(name='features'), rois_node: placeholder(name='rois') }) # apply the cloned nodes to the input nodes to obtain the model for evaluation self.__model = cloned_nodes(image_input, roi_input) # cache the indices of the input nodes self.__args_indices = {} for i, arg in enumerate(self.__model.arguments): self.__args_indices[arg.name] = i