def triangular_matrix_seq(mode: int = 1): X = C.placeholder(1) ones = C.ones_like(X[0]) perm_1 = C.layers.Recurrence(C.plus, return_full_state=True)(ones) perm_2 = C.layers.Recurrence(C.plus, go_backwards=True, return_full_state=True)(ones) arr_1 = C.sequence.unpack(perm_1, 0, True) arr_2 = C.sequence.unpack(perm_2, 0, True) mat = C.times_transpose(arr_1, arr_2) mat_c = arr_1 * arr_2 diagonal_mat = mat - mat_c final_mat = diagonal_mat if mode == 0: final_mat = C.equal(final_mat, 0) elif mode == 1: final_mat = C.less_equal(final_mat, 0) elif mode == 2: final_mat = C.less(final_mat, 0) elif mode == -1: final_mat = C.greater_equal(final_mat, 0) elif mode == -2: final_mat = C.greater(final_mat, 0) result = C.as_block(final_mat, [(X, X)], 'triangular_matrix') return C.stop_gradient(result)
def cross_entropy_with_full_softmax( output, # Node providing the output of the lstm layers target_vector, # Node providing the expected labels sv_dim, vocab_dim ): sv_vector = output.outputs[3] z = output.outputs[0] zT = C.times_transpose(z, target_vector) # cross entropy loss with softmax function ce = - C.log(zT) # the error zMax = C.reduce_max(z) error = C.less(zT, zMax) ce = sequence.reduce_sum(ce) # discourages the network from turning more than one gate off in a single time step. sumc = C.abs(C.sequence.slice(sv_vector, 1, 0) - C.sequence.slice(sv_vector, 0, -1)) sumc = sequence.reduce_sum(0.0001 * C.pow(100.0, sumc)) #ce += sumc # penalise generated utterances that failed to render all the required slots sumc += C.abs(C.sequence.last(sv_vector)) sumc += C.abs(C.sequence.first(sv_vector) - output.outputs[4]) sumc = C.reduce_sum(sumc) ce = C.reduce_sum(ce) ce += sumc return ce, error
def cross_entropy_with_sampled_softmax( hidden_vector, # Node providing the output of the recurrent layers target_vector, # Node providing the expected labels (as sparse vectors) vocab_dim, # Vocabulary size hidden_dim, # Dimension of the hidden vector num_samples, # Number of samples to use for sampled softmax sampling_weights, # Node providing weights to be used for the weighted sampling allow_duplicates=False # Boolean flag to control whether to use sampling with replacement (allow_duplicates == True) or without replacement. ): bias = C.layers.Parameter(shape=(vocab_dim, 1), init=0) weights = C.layers.Parameter(shape=(vocab_dim, hidden_dim), init=C.initializer.glorot_uniform()) sample_selector_sparse = C.random_sample( sampling_weights, num_samples, allow_duplicates) # sparse matrix [num_samples * vocab_size] if use_sparse: sample_selector = sample_selector_sparse else: # Note: Sampled softmax with dense data is only supported for debugging purposes. # It might easily run into memory issues as the matrix 'I' below might be quite large. # In case we wan't to a dense representation for all data we have to convert the sample selector I = C.Constant(np.eye(vocab_dim, dtype=np.float32)) sample_selector = C.times(sample_selector_sparse, I) inclusion_probs = C.random_sample_inclusion_frequency( sampling_weights, num_samples, allow_duplicates) # dense row [1 * vocab_size] log_prior = C.log(inclusion_probs) # dense row [1 * vocab_dim] print("hidden_vector: " + str(hidden_vector.shape)) wS = C.times(sample_selector, weights, name='wS') # [num_samples * hidden_dim] print("ws:" + str(wS.shape)) zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times( sample_selector, bias, name='zS2') - C.times_transpose( sample_selector, log_prior, name='zS3') # [num_samples] # Getting the weight vector for the true label. Dimension hidden_dim wT = C.times(target_vector, weights, name='wT') # [1 * hidden_dim] zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times( target_vector, bias, name='zT2') - C.times_transpose( target_vector, log_prior, name='zT3') # [1] zSReduced = C.reduce_log_sum_exp(zS) # Compute the cross entropy that is used for training. # We don't check whether any of the classes in the random samples coincides with the true label, so it might happen that the true class is counted # twice in the normalizing denominator of sampled softmax. cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT # For applying the model we also output a node providing the input for the full softmax z = C.times_transpose(weights, hidden_vector) + bias z = C.reshape(z, shape=(vocab_dim)) zSMax = C.reduce_max(zS) error_on_samples = C.less(zT, zSMax) return (z, cross_entropy_on_samples, error_on_samples)
def test_Less(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype = dtype): data0 = np.asarray([41., 42., 43.], dtype=dtype) data1 = np.asarray([42., 42., 42.], dtype=dtype) model = C.less(data0, data1) verify_no_input(model, tmpdir, 'Less_0')
def test_Less(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype=dtype): data0 = np.asarray([41., 42., 43.], dtype=dtype) data1 = np.asarray([42., 42., 42.], dtype=dtype) model = C.less(data0, data1) verify_no_input(model, tmpdir, 'Less_0')
def cross_entropy_with_sampled_softmax( hidden_vector, # Node providing the output of the recurrent layers target_vector, # Node providing the expected labels (as sparse vectors) vocab_dim, # Vocabulary size hidden_dim, # Dimension of the hidden vector num_samples, # Number of samples to use for sampled softmax sampling_weights, # Node providing weights to be used for the weighted sampling allow_duplicates = False # Boolean flag to control whether to use sampling with replacement (allow_duplicates == True) or without replacement. ): bias = C.Parameter(shape = (vocab_dim, 1), init = 0) weights = C.Parameter(shape = (vocab_dim, hidden_dim), init = C.initializer.glorot_uniform()) sample_selector_sparse = C.random_sample(sampling_weights, num_samples, allow_duplicates) # sparse matrix [num_samples * vocab_size] if use_sparse: sample_selector = sample_selector_sparse else: # Note: Sampled softmax with dense data is only supported for debugging purposes. # It might easily run into memory issues as the matrix 'I' below might be quite large. # In case we wan't to a dense representation for all data we have to convert the sample selector I = C.Constant(np.eye(vocab_dim, dtype=np.float32)) sample_selector = C.times(sample_selector_sparse, I) inclusion_probs = C.random_sample_inclusion_frequency(sampling_weights, num_samples, allow_duplicates) # dense row [1 * vocab_size] log_prior = C.log(inclusion_probs) # dense row [1 * vocab_dim] print("hidden_vector: "+str(hidden_vector.shape)) wS = C.times(sample_selector, weights, name='wS') # [num_samples * hidden_dim] print("ws:"+str(wS.shape)) zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times(sample_selector, bias, name='zS2') - C.times_transpose (sample_selector, log_prior, name='zS3')# [num_samples] # Getting the weight vector for the true label. Dimension hidden_dim wT = C.times(target_vector, weights, name='wT') # [1 * hidden_dim] zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times(target_vector, bias, name='zT2') - C.times_transpose(target_vector, log_prior, name='zT3') # [1] zSReduced = C.reduce_log_sum_exp(zS) # Compute the cross entropy that is used for training. # We don't check whether any of the classes in the random samples coincides with the true label, so it might happen that the true class is counted # twice in the normalizing denominator of sampled softmax. cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT # For applying the model we also output a node providing the input for the full softmax z = C.times_transpose(weights, hidden_vector) + bias z = C.reshape(z, shape = (vocab_dim)) zSMax = C.reduce_max(zS) error_on_samples = C.less(zT, zSMax) return (z, cross_entropy_on_samples, error_on_samples)
def cross_entropy_with_full_softmax( hidden_vector, # Node providing the output of the recurrent layers target_vector, # Node providing the expected labels (as sparse vectors) vocab_dim, # Vocabulary size hidden_dim # Dimension of the hidden vector ): bias = C.Parameter(shape = (vocab_dim, 1), init = 0) weights = C.Parameter(shape = (vocab_dim, hidden_dim), init = C.initializer.glorot_uniform()) z = C.reshape(C.times_transpose(weights, hidden_vector) + bias, (1,vocab_dim)) zT = C.times_transpose(z, target_vector) ce = C.reduce_log_sum_exp(z) - zT zMax = C.reduce_max(z) error_on_samples = C.less(zT, zMax) return (z, ce, error_on_samples)
def cross_entropy_with_full_softmax( hidden_vector, # Node providing the output of the recurrent layers target_vector, # Node providing the expected labels (as sparse vectors) vocab_dim, # Vocabulary size hidden_dim # Dimension of the hidden vector ): bias = C.Parameter(shape=(vocab_dim, 1), init=0) weights = C.Parameter(shape=(vocab_dim, hidden_dim), init=C.initializer.glorot_uniform()) z = C.reshape( C.times_transpose(weights, hidden_vector) + bias, (1, vocab_dim)) zT = C.times_transpose(z, target_vector) ce = C.reduce_log_sum_exp(z) - zT zMax = C.reduce_max(z) error_on_samples = C.less(zT, zMax) return (z, ce, error_on_samples)
def SmoothL1Loss(sigma, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights): """ From https://github.com/smallcorgi/Faster-RCNN_TF/blob/master/lib/fast_rcnn/train.py ResultLoss = outside_weights * SmoothL1(inside_weights * (bbox_pred - bbox_targets)) SmoothL1(x) = 0.5 * (sigma * x)^2, if |x| < 1 / sigma^2 |x| - 0.5 / sigma^2, otherwise """ sigma2 = sigma * sigma inside_mul_abs = C.abs(C.element_times(bbox_inside_weights, C.minus(bbox_pred, bbox_targets))) smooth_l1_sign = C.less(inside_mul_abs, 1.0 / sigma2) smooth_l1_option1 = C.element_times(C.element_times(inside_mul_abs, inside_mul_abs), 0.5 * sigma2) smooth_l1_option2 = C.minus(inside_mul_abs, 0.5 / sigma2) smooth_l1_result = C.plus(C.element_times(smooth_l1_option1, smooth_l1_sign), C.element_times(smooth_l1_option2, C.minus(1.0, smooth_l1_sign))) return C.element_times(bbox_outside_weights, smooth_l1_result)
def less(left, right, name=''): ''' Elementwise 'less' comparison of two tensors. Result is 1 if left < right else 0. Example: >>> C.eval(C.less([41., 42., 43.], [42., 42., 42.])) [array([[1., 0., 0.]])] >>> C.eval(C.eq([-1,0,1], [0])) [array([[1., 0., 0.]])] Args: left: left side tensor right: right side tensor name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import less left = sanitize_input(left, get_data_type(right)) right = sanitize_input(right, get_data_type(left)) return less(left, right, name).output()
def cross_entropy_with_sampled_softmax( hidden_vector, label_vector, vocab_dim, hidden_dim, num_samples, sampling_weights, allow_duplicates = False ): bias = C.layers.Parameter(shape = (vocab_dim, 1), init = 0) weights = C.layers.Parameter(shape = (vocab_dim, hidden_dim), init = C.initializer.glorot_uniform()) sample_selector_sparse = C.random_sample(sampling_weights, num_samples, allow_duplicates) sample_selector = sample_selector_sparse inclusion_probs = C.random_sample_inclusion_frequency(sampling_weights, num_samples, allow_duplicates) log_prior = C.log(inclusion_probs) wS = C.times(sample_selector, weights, name='wS') zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times(sample_selector, bias, name='zS2') - C.times_transpose (sample_selector, log_prior, name='zS3') # Getting the weight vector for the true label. Dimension hidden_dim wT = C.times(label_vector, weights, name='wT') zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times(label_vector, bias, name='zT2') - C.times_transpose(label_vector, log_prior, name='zT3') zSReduced = C.reduce_log_sum_exp(zS) # Compute the cross entropy that is used for training. cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT # For applying the model we also output a node providing the input for the full softmax z = C.times_transpose(weights, hidden_vector) + bias z = C.reshape(z, shape = (vocab_dim)) zSMax = C.reduce_max(zS) error_on_samples = C.less(zT, zSMax) return (z, cross_entropy_on_samples, error_on_samples)
def test_Less(tmpdir): data0 = np.asarray([41., 42., 43.], dtype=np.float32) data1 = np.asarray([42., 42., 42.], dtype=np.float32) model = C.less(data0, data1) verify_no_input(model, tmpdir, 'Less_0')
def _leaky_relu_inv(x): alpha = 1 / 0.001 _upper = C.greater_equal(x, 0) * x _lower = C.less(x, 0) * alpha * x return _lower + _upper
import cntk A = [1, 3, 4] B = [4, 3, 2] print("less(A,B):") less = cntk.less(A, B).eval() print("{}\n".format(less)) print("equal(A,B):") equal = cntk.equal(A, B).eval() print("{}\n".format(equal)) print("greater(A,B)") greater = cntk.greater(A, B).eval() print("{}\n".format(greater)) print("greater_equal(A,B):") greater_equal = cntk.greater_equal(A, B).eval() print("{}\n".format(greater_equal)) print("not_equal(A,B):") not_equal = cntk.not_equal(A, B).eval() print("{}\n".format(not_equal)) print("less_equal(A,B):") less_equal = cntk.less_equal(A, B).eval() print("{}\n".format(less_equal))