Esempio n. 1
0
def l2_orthogonal_regularizer(logits_to_normalize, l2_alpha_loss_factor = 10, name = None):

  '''Motivation from this loss function comes from: https://www.reddit.com/r/MachineLearning/comments/3uk2q5/151106464_unitary_evolution_recurrent_neural/
  Specifically want to thank spurious_recollectio on reddit for discussing this suggestion to me '''

  '''Will add a L2 Loss linearly to the softmax cost function.


    Returns:
  final_reg_loss: One Scalar Value representing the loss averaged across the batch'''

  '''this is different than unitary because it is an orthongonal matrix approximation -- it will
  suffer from timesteps longer than 500 and will take more computation power of O(n^3)'''

  with tf.op_scope(logits_to_normalize, name, "rnn_l2_loss"): #need to have this for tf to work

    '''somehow we need to get the Weights from the rnns right here....i don't know how! '''
    '''the l1 equation is: alpha * T.abs(T.dot(W, W.T) - (1.05) ** 2 * T.identity_like(W))'''
    '''The Equation of the Cost Is: loss += alpha * T.sum((T.dot(W, W.T) - (1.05)*2 T.identity_like(W)) * 2)'''
    Weights_for_l2_loss = tf.get_variable("linear")

    matrix_dot_product= tf.matmul(Weights_for_l2_loss, Weights_for_l2_loss, transpose_a = True)

    #we need to check here that we have the right dimension -- should it be 0 or the 1 dim?
    identity_matrix = lfe.identity_like(Weights_for_l2_loss)

    matrix_minus_identity = matrix_dot_product - 2*1.05*identity_matrix

    square_the_loss = tf.square(matrix_minus_identity)

    final_l2_loss = l2_alpha_loss_factor*(tf.reduce_sum(square_the_loss)/(batch_size))
  return final_l2_loss
def l2_orthogonal_regularizer(logits_to_normalize,
                              l2_alpha_loss_factor=10,
                              name=None):
    '''Motivation from this loss function comes from: https://www.reddit.com/r/MachineLearning/comments/3uk2q5/151106464_unitary_evolution_recurrent_neural/
  Specifically want to thank spurious_recollectio on reddit for discussing this suggestion to me '''
    '''Will add a L2 Loss linearly to the softmax cost function.


    Returns:
  final_reg_loss: One Scalar Value representing the loss averaged across the batch'''
    '''this is different than unitary because it is an orthongonal matrix approximation -- it will
  suffer from timesteps longer than 500 and will take more computation power of O(n^3)'''

    with tf.op_scope(logits_to_normalize, name,
                     "rnn_l2_loss"):  #need to have this for tf to work
        '''somehow we need to get the Weights from the rnns right here....i don't know how! '''
        '''the l1 equation is: alpha * T.abs(T.dot(W, W.T) - (1.05) ** 2 * T.identity_like(W))'''
        '''The Equation of the Cost Is: loss += alpha * T.sum((T.dot(W, W.T) - (1.05)*2 T.identity_like(W)) * 2)'''
        Weights_for_l2_loss = tf.get_variable("linear")

        matrix_dot_product = tf.matmul(Weights_for_l2_loss,
                                       Weights_for_l2_loss,
                                       transpose_a=True)

        #we need to check here that we have the right dimension -- should it be 0 or the 1 dim?
        identity_matrix = lfe.identity_like(Weights_for_l2_loss)

        matrix_minus_identity = matrix_dot_product - 2 * 1.05 * identity_matrix

        square_the_loss = tf.square(matrix_minus_identity)

        final_l2_loss = l2_alpha_loss_factor * (
            tf.reduce_sum(square_the_loss) / (batch_size))
    return final_l2_loss
Esempio n. 3
0
def l1_orthogonal_regularizer(logits_to_normalize, l1_alpha_loss_factor = 10, name = None):

  '''Motivation from this loss function comes from: https://redd.it/3wx4sr
  Specifically want to thank spurious_recollectio and harponen on reddit for discussing this suggestion to me '''

  '''Will add a L1 Loss linearly to the softmax cost function.


    Returns:
  final_reg_loss: One Scalar Value representing the loss averaged across the batch'''

  '''this is different than unitary because it is an orthongonal matrix approximation -- it will
  suffer from timesteps longer than 500 and will take more computation power of O(n^3)'''

  with tf.op_scope(logits_to_normalize, name, "rnn_l2_loss"): #need to have this for tf to work

    '''the l1 equation is: alpha * T.abs(T.dot(W, W.T) - (1.05) ** 2 * T.identity_like(W))'''
    Weights_for_l1_loss = tf.get_variable("linear")

    matrix_dot_product= tf.matmul(Weights_for_l1_loss, Weights_for_l1_loss, transpose_a = True)

    #we need to check here that we have the right dimension -- should it be 0 or the 1 dim?
    identity_matrix = lfe.identity_like(Weights_for_l1_loss)

    matrix_minus_identity = matrix_dot_product - 2*1.05*identity_matrix

    absolute_cost = tf.abs(matrix_minus_identity)

    final_l1_loss = l1_alpha_loss_factor*(absolute_cost/batch_size)

  return final_l1_loss
def l1_orthogonal_regularizer(logits_to_normalize,
                              l1_alpha_loss_factor=10,
                              name=None):
    '''Motivation from this loss function comes from: https://redd.it/3wx4sr
  Specifically want to thank spurious_recollectio and harponen on reddit for discussing this suggestion to me '''
    '''Will add a L1 Loss linearly to the softmax cost function.


    Returns:
  final_reg_loss: One Scalar Value representing the loss averaged across the batch'''
    '''this is different than unitary because it is an orthongonal matrix approximation -- it will
  suffer from timesteps longer than 500 and will take more computation power of O(n^3)'''

    with tf.op_scope(logits_to_normalize, name,
                     "rnn_l2_loss"):  #need to have this for tf to work
        '''the l1 equation is: alpha * T.abs(T.dot(W, W.T) - (1.05) ** 2 * T.identity_like(W))'''
        Weights_for_l1_loss = tf.get_variable("linear")

        matrix_dot_product = tf.matmul(Weights_for_l1_loss,
                                       Weights_for_l1_loss,
                                       transpose_a=True)

        #we need to check here that we have the right dimension -- should it be 0 or the 1 dim?
        identity_matrix = lfe.identity_like(Weights_for_l1_loss)

        matrix_minus_identity = matrix_dot_product - 2 * 1.05 * identity_matrix

        absolute_cost = tf.abs(matrix_minus_identity)

        final_l1_loss = l1_alpha_loss_factor * (absolute_cost / batch_size)

    return final_l1_loss