Esempio n. 1
0
def grad_clip_joint(loss_joint, max_grad_norm, scope_list_joint):
    '''
    :param loss_joint: [loss1, loss2, loss3, ...]
    :param max_grad_norm:
    :param scope_list_joint: [scope_list1(with respect to loss1), scope_list2, scope_list3, ..,]
    :return:
    '''
    grads_joint = []
    params_list_joint = []
    seg_points = [int(0)]
    for i, loss in enumerate(loss_joint):
        params_list = []
        for scope in scope_list_joint[i]:
            List = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                     scope=scope)
            print(len(List))
            params_list += List
        grads_joint += tf.gradients(loss, params_list)
        params_list_joint += params_list
        seg_points.append(len(params_list))
    if max_grad_norm is not None:
        grads_joint, grad_norm = tf.clip_by_global_norm(
            grads_joint, max_grad_norm)
    grads_joint_return = []
    global_norm_return = []
    for i in range(len(seg_points) - 1):
        grads = grads_joint[seg_points[i]:seg_points[i + 1]]
        params_list = params_list_joint[seg_points[i]:seg_points[i + 1]]
        global_norm = tf.sqrt(sum([l2norm(t)**2 for t in grads]))
        grads = list(zip(grads, params_list))
        grads_joint_return.append(grads)
        global_norm_return.append(global_norm)
    return grads_joint_return, global_norm_return
Esempio n. 2
0
 def orthogonalize_(i, basis, v):
   v_norm = util.l2norm(v)
   v_new, v_new_norm = orthogonalize_once(i, basis, v)
   # If the norm decreases more than 1/sqrt(2), run a second
   # round of MGS. See proof in:
   #   B. N. Parlett, ``The Symmetric Eigenvalue Problem'',
   #   Prentice-Hall, Englewood Cliffs, NJ, 1980. pp. 105-109
   return tf.cond(v_new_norm < 0.7071 * v_norm,
                  lambda: orthogonalize_once(i, basis, v),
                  lambda: (v_new, v_new_norm))
Esempio n. 3
0
 def testL2Norm(self):
   with self.test_session():
     x_np = np.array([[2], [-3.], [5.]])
     x_norm_np = np.linalg.norm(x_np)
     x_normalized_np = x_np / x_norm_np
     x = constant_op.constant(x_np)
     l2norm = util.l2norm(x)
     l2norm_squared = util.l2norm_squared(x)
     x_normalized, x_norm = util.l2normalize(x)
     self.assertAllClose(l2norm.eval(), x_norm_np)
     self.assertAllClose(l2norm_squared.eval(), np.square(x_norm_np))
     self.assertAllClose(x_norm.eval(), x_norm_np)
     self.assertAllClose(x_normalized.eval(), x_normalized_np)
Esempio n. 4
0
 def testL2Norm(self):
     with self.test_session():
         x_np = np.array([[2], [-3.], [5.]])
         x_norm_np = np.linalg.norm(x_np)
         x_normalized_np = x_np / x_norm_np
         x = constant_op.constant(x_np)
         l2norm = util.l2norm(x)
         l2norm_squared = util.l2norm_squared(x)
         x_normalized, x_norm = util.l2normalize(x)
         self.assertAllClose(l2norm.eval(), x_norm_np)
         self.assertAllClose(l2norm_squared.eval(), np.square(x_norm_np))
         self.assertAllClose(x_norm.eval(), x_norm_np)
         self.assertAllClose(x_normalized.eval(), x_normalized_np)
Esempio n. 5
0
def grad_clip(loss, max_grad_norm, scope_list):
    '''
    :param loss:
    :param params:
    :param max_grad_norm:
    :param scope: a list consist of variable scopes
    :return:
    '''
    params_list = []
    for scope in scope_list:
        List = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
        print(len(List))
        params_list += List
    grads = tf.gradients(loss, params_list)
    # for i, grad in enumerate(grads):
    #     if grad is None:
    #         grads[i] = tf.zeros(shape=params_list[i].get_shape(), dtype=params_list[i].dtype)
    global_norm = 0.
    if max_grad_norm is not None:
        grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm)
        global_norm = tf.sqrt(sum([l2norm(t)**2 for t in grads]))
    grads = list(zip(grads, params_list))
    return grads, global_norm