def l2_orthogonal_regularizer(logits_to_normalize, l2_alpha_loss_factor = 10, name = None): '''Motivation from this loss function comes from: https://www.reddit.com/r/MachineLearning/comments/3uk2q5/151106464_unitary_evolution_recurrent_neural/ Specifically want to thank spurious_recollectio on reddit for discussing this suggestion to me ''' '''Will add a L2 Loss linearly to the softmax cost function. Returns: final_reg_loss: One Scalar Value representing the loss averaged across the batch''' '''this is different than unitary because it is an orthongonal matrix approximation -- it will suffer from timesteps longer than 500 and will take more computation power of O(n^3)''' with tf.op_scope(logits_to_normalize, name, "rnn_l2_loss"): #need to have this for tf to work '''somehow we need to get the Weights from the rnns right here....i don't know how! ''' '''the l1 equation is: alpha * T.abs(T.dot(W, W.T) - (1.05) ** 2 * T.identity_like(W))''' '''The Equation of the Cost Is: loss += alpha * T.sum((T.dot(W, W.T) - (1.05)*2 T.identity_like(W)) * 2)''' Weights_for_l2_loss = tf.get_variable("linear") matrix_dot_product= tf.matmul(Weights_for_l2_loss, Weights_for_l2_loss, transpose_a = True) #we need to check here that we have the right dimension -- should it be 0 or the 1 dim? identity_matrix = lfe.identity_like(Weights_for_l2_loss) matrix_minus_identity = matrix_dot_product - 2*1.05*identity_matrix square_the_loss = tf.square(matrix_minus_identity) final_l2_loss = l2_alpha_loss_factor*(tf.reduce_sum(square_the_loss)/(batch_size)) return final_l2_loss
def l2_orthogonal_regularizer(logits_to_normalize, l2_alpha_loss_factor=10, name=None): '''Motivation from this loss function comes from: https://www.reddit.com/r/MachineLearning/comments/3uk2q5/151106464_unitary_evolution_recurrent_neural/ Specifically want to thank spurious_recollectio on reddit for discussing this suggestion to me ''' '''Will add a L2 Loss linearly to the softmax cost function. Returns: final_reg_loss: One Scalar Value representing the loss averaged across the batch''' '''this is different than unitary because it is an orthongonal matrix approximation -- it will suffer from timesteps longer than 500 and will take more computation power of O(n^3)''' with tf.op_scope(logits_to_normalize, name, "rnn_l2_loss"): #need to have this for tf to work '''somehow we need to get the Weights from the rnns right here....i don't know how! ''' '''the l1 equation is: alpha * T.abs(T.dot(W, W.T) - (1.05) ** 2 * T.identity_like(W))''' '''The Equation of the Cost Is: loss += alpha * T.sum((T.dot(W, W.T) - (1.05)*2 T.identity_like(W)) * 2)''' Weights_for_l2_loss = tf.get_variable("linear") matrix_dot_product = tf.matmul(Weights_for_l2_loss, Weights_for_l2_loss, transpose_a=True) #we need to check here that we have the right dimension -- should it be 0 or the 1 dim? identity_matrix = lfe.identity_like(Weights_for_l2_loss) matrix_minus_identity = matrix_dot_product - 2 * 1.05 * identity_matrix square_the_loss = tf.square(matrix_minus_identity) final_l2_loss = l2_alpha_loss_factor * ( tf.reduce_sum(square_the_loss) / (batch_size)) return final_l2_loss
def l1_orthogonal_regularizer(logits_to_normalize, l1_alpha_loss_factor = 10, name = None): '''Motivation from this loss function comes from: https://redd.it/3wx4sr Specifically want to thank spurious_recollectio and harponen on reddit for discussing this suggestion to me ''' '''Will add a L1 Loss linearly to the softmax cost function. Returns: final_reg_loss: One Scalar Value representing the loss averaged across the batch''' '''this is different than unitary because it is an orthongonal matrix approximation -- it will suffer from timesteps longer than 500 and will take more computation power of O(n^3)''' with tf.op_scope(logits_to_normalize, name, "rnn_l2_loss"): #need to have this for tf to work '''the l1 equation is: alpha * T.abs(T.dot(W, W.T) - (1.05) ** 2 * T.identity_like(W))''' Weights_for_l1_loss = tf.get_variable("linear") matrix_dot_product= tf.matmul(Weights_for_l1_loss, Weights_for_l1_loss, transpose_a = True) #we need to check here that we have the right dimension -- should it be 0 or the 1 dim? identity_matrix = lfe.identity_like(Weights_for_l1_loss) matrix_minus_identity = matrix_dot_product - 2*1.05*identity_matrix absolute_cost = tf.abs(matrix_minus_identity) final_l1_loss = l1_alpha_loss_factor*(absolute_cost/batch_size) return final_l1_loss
def l1_orthogonal_regularizer(logits_to_normalize, l1_alpha_loss_factor=10, name=None): '''Motivation from this loss function comes from: https://redd.it/3wx4sr Specifically want to thank spurious_recollectio and harponen on reddit for discussing this suggestion to me ''' '''Will add a L1 Loss linearly to the softmax cost function. Returns: final_reg_loss: One Scalar Value representing the loss averaged across the batch''' '''this is different than unitary because it is an orthongonal matrix approximation -- it will suffer from timesteps longer than 500 and will take more computation power of O(n^3)''' with tf.op_scope(logits_to_normalize, name, "rnn_l2_loss"): #need to have this for tf to work '''the l1 equation is: alpha * T.abs(T.dot(W, W.T) - (1.05) ** 2 * T.identity_like(W))''' Weights_for_l1_loss = tf.get_variable("linear") matrix_dot_product = tf.matmul(Weights_for_l1_loss, Weights_for_l1_loss, transpose_a=True) #we need to check here that we have the right dimension -- should it be 0 or the 1 dim? identity_matrix = lfe.identity_like(Weights_for_l1_loss) matrix_minus_identity = matrix_dot_product - 2 * 1.05 * identity_matrix absolute_cost = tf.abs(matrix_minus_identity) final_l1_loss = l1_alpha_loss_factor * (absolute_cost / batch_size) return final_l1_loss