def multiFunc(self, arg1): # load or create the inputs we need multiIn = C.input(shape=arg1.shape, dynamic_axes = arg1.dynamic_axes) bit_map = C.constant(self.bit_map) max_bits = self.bit_map.max() shape = multiIn.shape reformed = C.reshape(multiIn, (-1,)) # lets compute the means we need # carry over represents the remaining value that needs to binarized. For a single bit, this is just the input. For more bits, # it is the difference between the previous bits approximation and the true value. carry_over = multiIn approx = C.element_times(multiIn, 0) # iterate through the maximum number of bits specified by the bit maps, basically compute each level of binarization for i in range(max_bits): # determine which values of the input should be binarized to i bits or more hot_vals = C.greater(bit_map, i) # select only the values which we need to binarize valid_vals = C.element_select(hot_vals, carry_over, 0) # compute mean on a per kernel basis, reshaping is done to allow for sum reduction along only axis 0 (the kernels) mean = C.element_divide(C.reduce_sum(C.reshape(C.abs(valid_vals), (valid_vals.shape[0], -1)), axis=1), C.reduce_sum(C.reshape(hot_vals, (hot_vals.shape[0], -1)), axis=1)) # reshape the mean to match the dimensionality of the input mean = C.reshape(mean, (mean.shape[0], mean.shape[1], 1, 1)) # binarize the carry over bits = C.greater(carry_over, 0) bits = C.element_select(bits, bits, -1) bits = C.element_select(hot_vals, bits, 0) # add in the equivalent binary representation to the approximation approx = C.plus(approx, C.element_times(mean, bits)) # compute the new carry over carry_over = C.plus(C.element_times(C.element_times(-1, bits), mean), carry_over) return approx, multiIn
def output_layer(self, query, match_context): q_processed = C.placeholder(shape=(2*self.hidden_dim,)) mat_context = C.placeholder(shape=(2*self.hidden_dim,)) #output layer r_q = question_pooling(q_processed, 2*self.hidden_dim) #shape n*(2*self.hidden_dim) p1_logits = attention_weight(mat_context, r_q, 2*self.hidden_dim) attention_pool = C.sequence.reduce_sum(p1_logits * mat_context) state = C.layers.GRU(2*self.hidden_dim)(attention_pool, r_q) p2_logits = attention_weight(mat_context, state, 2*self.hidden_dim) @C.Function def start_ave_point(p1_logits, p2_logits, point): @C.Function def start_ave(last, now): now = now + last - last new_start = now * C.sequence.gather(p2_logits, point) point = C.sequence.future_value(point) return new_start start_logits_ave = C.layers.Recurrence(start_ave)(p1_logits) return start_logits_ave point = C.sequence.is_first(p1_logits) point = C.layers.Sequential([For(range(2), lambda: C.layers.Recurrence(C.plus))])(point) point = C.greater(C.constant(16), point) start_logits_ave = start_ave_point(p1_logits, p2_logits, point) @C.Function def end_ave_point(p1_logits, p2_logits, point): @C.Function def end_ave(last, now): now = now + last - last new_end = now * C.sequence.gather(p2_logits, point) point = C.sequence.past_value(point) return new_end end_logits_ave = C.layers.Recurrence(end_ave, go_backwards=True)(p2_logits) return end_logits_ave point = C.sequence.is_last(p1_logits) point = C.layers.Sequential([For(range(2), lambda: C.layers.Recurrence(C.plus, go_backwards=True))])(point) point = C.greater(C.constant(16),point) end_logits_ave = end_ave_point(p1_logits, p2_logits, point) start_logits = seq_hardmax(start_logits_ave) end_logits = seq_hardmax(end_logits_ave) ''' start_logits = seq_hardmax(p1_logits) end_logits = seq_hardmax(p2_logits) ''' return C.as_block( C.combine([start_logits, end_logits]), [(q_processed, query), (mat_context, match_context)], 'output_layer', 'output_layer')
def triangular_matrix_seq(mode: int = 1): X = C.placeholder(1) ones = C.ones_like(X[0]) perm_1 = C.layers.Recurrence(C.plus, return_full_state=True)(ones) perm_2 = C.layers.Recurrence(C.plus, go_backwards=True, return_full_state=True)(ones) arr_1 = C.sequence.unpack(perm_1, 0, True) arr_2 = C.sequence.unpack(perm_2, 0, True) mat = C.times_transpose(arr_1, arr_2) mat_c = arr_1 * arr_2 diagonal_mat = mat - mat_c final_mat = diagonal_mat if mode == 0: final_mat = C.equal(final_mat, 0) elif mode == 1: final_mat = C.less_equal(final_mat, 0) elif mode == 2: final_mat = C.less(final_mat, 0) elif mode == -1: final_mat = C.greater_equal(final_mat, 0) elif mode == -2: final_mat = C.greater(final_mat, 0) result = C.as_block(final_mat, [(X, X)], 'triangular_matrix') return C.stop_gradient(result)
def test_unfold(): from cntk.layers import UnfoldFrom @Function def double_up(s): return s * 2 x = [[[0],[0],[0]], [[0],[0],[0],[0],[0]]] #################################################### # Test 1: simple unfold #################################################### UF = UnfoldFrom(double_up) @Function @Signature(Sequence[Tensor[1]]) def FU(x): return UF(Constant(1), x) r = FU(x) exp = [[[ 2 ], [ 4 ], [ 8 ]], [[ 2 ], [ 4 ], [ 8 ], [ 16 ], [ 32 ]]] assert_list_of_arrays_equal(r, exp, err_msg='Error in UnfoldFrom() forward') #################################################### # Test 2: unfold with length increase and terminating condition #################################################### UF = UnfoldFrom(double_up, until_predicate=lambda x: greater(x, 63), length_increase=1.6) @Function @Signature(Sequence[Tensor[1]]) def FU(x): return UF(Constant(1), x) r = FU(x) exp = [[[ 2 ], [ 4 ], [ 8 ], [ 16 ], [ 32 ]], # tests length_increase [[ 2 ], [ 4 ], [ 8 ], [ 16 ], [ 32 ], [ 64 ]]] # tests early cut-off due to until_predicate assert_list_of_arrays_equal(r, exp, err_msg='Error in UnfoldFrom(..., until_predicate, length_increase, ...) forward')
def model(self): c1_axis = C.Axis.new_unique_dynamic_axis('c1_axis') c2_axis = C.Axis.new_unique_dynamic_axis('c2_axis') b = C.Axis.default_batch_axis() c1 = C.input_variable(self.word_dim, dynamic_axes=[b, c1_axis], name='c1') c2 = C.input_variable(self.word_dim, dynamic_axes=[b, c2_axis], name='c2') y = C.input_variable(1, dynamic_axes=[b], name='y') c1_processed, c2_processed = self.input_layer(c1, c2).outputs att_context = self.attention_layer(c2_processed, c1_processed, 'attention') c2_len = C.layers.Fold(plus1)(c2_processed) att_len = C.layers.Fold(plus1)(att_context) cos = C.cosine_distance( C.sequence.reduce_sum(c2_processed) / c2_len, C.sequence.reduce_sum(att_context) / att_len) prob = C.sigmoid(cos) is_context = C.greater(prob, 0.5) loss = C.losses.binary_cross_entropy(prob, y) acc = C.equal(is_context, y) return cos, loss, acc
def signFunc(self, arg): # create an input variable that matches the dimension of the input argument signIn = C.input(shape=arg.shape, dynamic_axes=arg.dynamic_axes) # create the first stage of the sign function, check if input is greater than zero actionfunc = C.greater(signIn, 0) # return the second stage of the sign function, replace any 0s with -1s return C.element_select(actionfunc, actionfunc, -1), signIn
def test_unfold(): from cntk.layers import UnfoldFrom @Function def double_up(s): return s * 2 x = [[[0],[0],[0]], [[0],[0],[0],[0],[0]]] #################################################### # Test 1: simple unfold #################################################### UF = UnfoldFrom(double_up) @Function @Signature(Sequence[Tensor[1]]) def FU(x): return UF(Constant(1), x) r = FU(x) exp = [[ 2. , 4. , 8.], [ 2. , 4. , 8. , 16. , 32. ]] assert_list_of_arrays_equal(r, exp, err_msg='Error in UnfoldFrom() forward') #################################################### # Test 2: unfold with length increase and terminating condition #################################################### UF = UnfoldFrom(double_up, until_predicate=lambda x: greater(x, 63), length_increase=1.6) @Function @Signature(Sequence[Tensor[1]]) def FU(x): return UF(Constant(1), x) r = FU(x) exp = [[ 2 , 4 , 8 , 16 , 32 ], # tests length_increase [ 2 , 4 , 8 , 16 , 32 , 64 ]] # tests early cut-off due to until_predicate assert_list_of_arrays_equal(r, exp, err_msg='Error in UnfoldFrom(..., until_predicate, length_increase, ...) forward')
def multiFunc(self, arg1): multiIn = C.input(shape=arg1.shape, dynamic_axes=arg1.dynamic_axes) bit_map = C.constant(self.bit_map) max_bits = self.bit_map.max() carry_over = multiIn approx = C.element_times(multiIn, 0) for i in range(max_bits): hot_vals = C.greater(bit_map, i) valid_vals = C.element_select(hot_vals, carry_over, 0) mean = C.element_divide(C.reduce_sum(C.abs(valid_vals)), C.reduce_sum(hot_vals)) bits = C.greater(carry_over, 0) bits = C.element_select(bits, bits, -1) bits = C.element_select(hot_vals, bits, 0) approx = C.plus(approx, C.element_times(mean, bits)) carry_over = C.plus( C.element_times(C.element_times(-1, bits), mean), carry_over) return approx, multiIn
def multiFunc(self, arg1): multiIn = C.input(shape=arg1.shape, dynamic_axes = arg1.dynamic_axes) bit_map = C.constant(self.bit_map) max_bits = self.bit_map.max() shape = multiIn.shape reformed = C.reshape(multiIn, (-1,)) carry_over = multiIn approx = C.element_times(multiIn, 0) for i in range(max_bits): hot_vals = C.greater(bit_map, i) valid_vals = C.element_select(hot_vals, carry_over, 0) mean = C.element_divide(C.reduce_sum(C.abs(valid_vals)), C.reduce_sum(hot_vals)) bits = C.greater(carry_over, 0) bits = C.element_select(bits, bits, -1) bits = C.element_select(hot_vals, bits, 0) approx = C.plus(approx, C.element_times(mean, bits)) carry_over = C.plus(C.element_times(C.element_times(-1, bits), mean), carry_over) return approx, multiIn
def multiFunc(self, arg1): # load or create the inputs we need multiIn = C.input(shape=arg1.shape, dynamic_axes=arg1.dynamic_axes) bit_map = C.constant(self.bit_map) max_bits = self.bit_map.max() shape = multiIn.shape reformed = C.reshape(multiIn, (-1, )) # lets compute the means we need # carry over represents the remaining value that needs to binarized. For a single bit, this is just the input. For more bits, # it is the difference between the previous bits approximation and the true value. carry_over = multiIn approx = C.element_times(multiIn, 0) # iterate through the maximum number of bits specified by the bit maps, basically compute each level of binarization for i in range(max_bits): # determine which values of the input should be binarized to i bits or more hot_vals = C.greater(bit_map, i) # select only the values which we need to binarize valid_vals = C.element_select(hot_vals, carry_over, 0) # compute mean on a per kernel basis, reshaping is done to allow for sum reduction along only axis 0 (the kernels) mean = C.element_divide( C.reduce_sum(C.reshape(C.abs(valid_vals), (valid_vals.shape[0], -1)), axis=1), C.reduce_sum(C.reshape(hot_vals, (hot_vals.shape[0], -1)), axis=1)) # reshape the mean to match the dimensionality of the input mean = C.reshape(mean, (mean.shape[0], mean.shape[1], 1, 1)) # binarize the carry over bits = C.greater(carry_over, 0) bits = C.element_select(bits, bits, -1) bits = C.element_select(hot_vals, bits, 0) # add in the equivalent binary representation to the approximation approx = C.plus(approx, C.element_times(mean, bits)) # compute the new carry over carry_over = C.plus( C.element_times(C.element_times(-1, bits), mean), carry_over) return approx, multiIn
def greater(left, right, name=''): ''' Elementwise 'greater' comparison of two tensors. Result is 1 if left > right else 0. Example: >>> C.eval(C.greater([41., 42., 43.], [42., 42., 42.])) [array([[0., 0., 1.]])] >>> C.eval(C.greater([-1,0,1], [0])) [array([[1., 0., 1.]])] Args: left: left side tensor right: right side tensor name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import greater left = sanitize_input(left, get_data_type(right)) right = sanitize_input(right, get_data_type(left)) return greater(left, right, name).output()
def sample(self, n=1): samples = C.random.uniform((n, 1)) indcies = C.argmax(C.greater(self.accum_prob - samples, 0), axis=1) return C.squeeze(indcies)
def test_Greater(tmpdir): model = C.greater([41., 42., 43.], [42., 42., 42.]) verify_no_input(model, tmpdir, 'Greater_0')
def test_Greater(tmpdir, dtype): with C.default_options(dtype=dtype): model = C.greater([41., 42., 43.], [42., 42., 42.]) verify_no_input(model, tmpdir, 'Greater_0')
def test_Greater(tmpdir, dtype): with C.default_options(dtype = dtype): model = C.greater([41., 42., 43.], [42., 42., 42.]) verify_no_input(model, tmpdir, 'Greater_0')
def validate_model(test_data, model, polymath): begin_logits = model.outputs[0] end_logits = model.outputs[1] loss = model.outputs[2] root = C.as_composite(loss.owner) mb_source, input_map = create_mb_and_map(root, test_data, polymath, randomize=False, repeat=False) begin_label = argument_by_name(root, 'ab') end_label = argument_by_name(root, 'ae') begin_prediction = C.sequence.input_variable( 1, sequence_axis=begin_label.dynamic_axes[1], needs_gradient=True) end_prediction = C.sequence.input_variable( 1, sequence_axis=end_label.dynamic_axes[1], needs_gradient=True) best_span_score = symbolic_best_span(begin_prediction, end_prediction) predicted_span = C.layers.Recurrence( C.plus)(begin_prediction - C.sequence.past_value(end_prediction)) true_span = C.layers.Recurrence(C.plus)(begin_label - C.sequence.past_value(end_label)) common_span = C.element_min(predicted_span, true_span) begin_match = C.sequence.reduce_sum( C.element_min(begin_prediction, begin_label)) end_match = C.sequence.reduce_sum(C.element_min(end_prediction, end_label)) predicted_len = C.sequence.reduce_sum(predicted_span) true_len = C.sequence.reduce_sum(true_span) common_len = C.sequence.reduce_sum(common_span) f1 = 2 * common_len / (predicted_len + true_len) exact_match = C.element_min(begin_match, end_match) precision = common_len / predicted_len recall = common_len / true_len overlap = C.greater(common_len, 0) s = lambda x: C.reduce_sum(x, axis=C.Axis.all_axes()) stats = C.splice(s(f1), s(exact_match), s(precision), s(recall), s(overlap), s(begin_match), s(end_match)) # Evaluation parameters minibatch_size = 2048 num_sequences = 0 stat_sum = 0 loss_sum = 0 with tqdm(ncols=32) as progress_bar: while True: data = mb_source.next_minibatch(minibatch_size, input_map=input_map) if not data or not (begin_label in data ) or data[begin_label].num_sequences == 0: break out = model.eval(data, outputs=[begin_logits, end_logits, loss], as_numpy=False) testloss = out[loss] g = best_span_score.grad( { begin_prediction: out[begin_logits], end_prediction: out[end_logits] }, wrt=[begin_prediction, end_prediction], as_numpy=False) other_input_map = { begin_prediction: g[begin_prediction], end_prediction: g[end_prediction], begin_label: data[begin_label], end_label: data[end_label] } stat_sum += stats.eval((other_input_map)) loss_sum += np.sum(testloss.asarray()) num_sequences += data[begin_label].num_sequences progress_bar.update(data[begin_label].num_sequences) stat_avg = stat_sum / num_sequences loss_avg = loss_sum / num_sequences print( "\nValidated {} sequences, loss {:.4f}, F1 {:.4f}, EM {:.4f}, precision {:4f}, recall {:4f} hasOverlap {:4f}, start_match {:4f}, end_match {:4f}" .format(num_sequences, loss_avg, stat_avg[0], stat_avg[1], stat_avg[2], stat_avg[3], stat_avg[4], stat_avg[5], stat_avg[6])) return loss_avg
def validate_model(test_data, model, polymath): begin_logits = model.outputs[0] end_logits = model.outputs[1] loss = model.outputs[2] root = C.as_composite(loss.owner) mb_source, input_map = create_mb_and_map(root, test_data, polymath, randomize=False, repeat=False) begin_label = argument_by_name(root, 'ab') end_label = argument_by_name(root, 'ae') begin_prediction = C.sequence.input_variable(1, sequence_axis=begin_label.dynamic_axes[1], needs_gradient=True) end_prediction = C.sequence.input_variable(1, sequence_axis=end_label.dynamic_axes[1], needs_gradient=True) best_span_score = symbolic_best_span(begin_prediction, end_prediction) predicted_span = C.layers.Recurrence(C.plus)(begin_prediction - C.sequence.past_value(end_prediction)) true_span = C.layers.Recurrence(C.plus)(begin_label - C.sequence.past_value(end_label)) common_span = C.element_min(predicted_span, true_span) begin_match = C.sequence.reduce_sum(C.element_min(begin_prediction, begin_label)) end_match = C.sequence.reduce_sum(C.element_min(end_prediction, end_label)) predicted_len = C.sequence.reduce_sum(predicted_span) true_len = C.sequence.reduce_sum(true_span) common_len = C.sequence.reduce_sum(common_span) f1 = 2*common_len/(predicted_len+true_len) exact_match = C.element_min(begin_match, end_match) precision = common_len/predicted_len recall = common_len/true_len overlap = C.greater(common_len, 0) s = lambda x: C.reduce_sum(x, axis=C.Axis.all_axes()) stats = C.splice(s(f1), s(exact_match), s(precision), s(recall), s(overlap), s(begin_match), s(end_match)) # Evaluation parameters minibatch_size = 20000 num_sequences = 0 stat_sum = 0 loss_sum = 0 while True: data = mb_source.next_minibatch(minibatch_size, input_map=input_map) if not data or not (begin_label in data) or data[begin_label].num_sequences == 0: break out = model.eval(data, outputs=[begin_logits,end_logits,loss], as_numpy=False) testloss = out[loss] g = best_span_score.grad({begin_prediction:out[begin_logits], end_prediction:out[end_logits]}, wrt=[begin_prediction,end_prediction], as_numpy=False) other_input_map = {begin_prediction: g[begin_prediction], end_prediction: g[end_prediction], begin_label: data[begin_label], end_label: data[end_label]} stat_sum += stats.eval((other_input_map)) loss_sum += np.sum(testloss.asarray()) num_sequences += data[begin_label].num_sequences stat_avg = stat_sum / num_sequences loss_avg = loss_sum / num_sequences print("Validated {} sequences, loss {:.4f}, F1 {:.4f}, EM {:.4f}, precision {:4f}, recall {:4f} hasOverlap {:4f}, start_match {:4f}, end_match {:4f}".format( num_sequences, loss_avg, stat_avg[0], stat_avg[1], stat_avg[2], stat_avg[3], stat_avg[4], stat_avg[5], stat_avg[6])) return loss_avg
def word_level_drop(self, doc): # doc [#, c][d] seq_shape = C.sequence.is_first(doc) u = C.random.uniform_like(seq_shape, seed=98052) mask = C.element_select(C.greater(u, 0.08), 1.0, 0) return doc * mask
import cntk A = [1, 3, 4] B = [4, 3, 2] print("less(A,B):") less = cntk.less(A, B).eval() print("{}\n".format(less)) print("equal(A,B):") equal = cntk.equal(A, B).eval() print("{}\n".format(equal)) print("greater(A,B)") greater = cntk.greater(A, B).eval() print("{}\n".format(greater)) print("greater_equal(A,B):") greater_equal = cntk.greater_equal(A, B).eval() print("{}\n".format(greater_equal)) print("not_equal(A,B):") not_equal = cntk.not_equal(A, B).eval() print("{}\n".format(not_equal)) print("less_equal(A,B):") less_equal = cntk.less_equal(A, B).eval() print("{}\n".format(less_equal))