def test_procrustes_rotation_translation(): # initial arrays array_a = np.array([[-7.3, 2.8], [-7.1, -0.2], [4.0, 1.4], [1.3, 0]]) # rotation by 20 degree & reflection in the x-axis theta = 0.34907 rotation = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) reflection = np.array([[1, 0], [0, -1]]) array_b = np.dot(array_a, np.dot(rotation, reflection)) # procrustes without translation and scaling new_a, new_b, array_u, _ = orthogonal(array_a, array_b) assert_almost_equal(new_a, array_a, decimal=6) assert_almost_equal(new_b, array_b, decimal=6) assert_almost_equal(array_u, np.dot(rotation, reflection), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # procrustes with translation new_a, new_b, array_u, _ = orthogonal(array_a, array_b, translate=True) assert_almost_equal(new_a, array_a - np.mean(array_a, axis=0), decimal=6) assert_almost_equal(new_b, array_b - np.mean(array_b, axis=0), decimal=6) assert_almost_equal(array_u, np.dot(rotation, reflection), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # procrustes with translation and scaling new_a, new_b, array_u, _ = orthogonal(array_a, array_b, translate=True, scale=True) assert_almost_equal(array_u, np.dot(rotation, reflection), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6)
def test_procrustes_reflection_square(): # square array array_a = np.array([[2.0, 0.1], [0.5, 3.0]]) # reflection through origin array_b = -array_a new_a, new_b, array_u, _ = orthogonal(array_a, array_b) assert_almost_equal(new_a, array_a, decimal=6) assert_almost_equal(new_b, array_b, decimal=6) assert_almost_equal(array_u, np.array([[-1, 0], [0, -1]]), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # reflection in the x-axis array_b = np.array([[2.0, -0.1], [0.5, -3.0]]) new_a, new_b, array_u, _ = orthogonal(array_a, array_b) assert_almost_equal(array_u, np.array([[1, 0], [0, -1]]), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # reflection in the y-axis array_b = np.array([[-2.0, 0.1], [-0.5, 3.0]]) new_a, new_b, array_u, _ = orthogonal(array_a, array_b) assert_almost_equal(array_u, np.array([[-1, 0], [0, 1]]), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # reflection in the line y=x array_b = np.array([[0.1, 2.0], [3.0, 0.5]]) new_a, new_b, array_u, _ = orthogonal(array_a, array_b) assert_almost_equal(array_u, np.array([[0, 1], [1, 0]]), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6)
def test_procrustes_orthogonal_identical(): # case of identical square arrays array_a = np.arange(9).reshape(3, 3) array_b = np.copy(array_a) new_a, new_b, array_u, _ = orthogonal(array_a, array_b) # check transformation array is identity assert_almost_equal(new_a, array_a, decimal=6) assert_almost_equal(new_b, array_b, decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # case of identical rectangular arrays (2 by 4) array_a = np.array([[1, 5, 6, 7], [1, 2, 9, 4]]) array_b = np.copy(array_a) new_a, new_b, array_u, _ = orthogonal(array_a, array_b) assert_almost_equal(new_a, array_a, decimal=6) assert_almost_equal(new_b, array_b, decimal=6) assert_equal(array_u.shape, (4, 4)) # assert_almost_equal(array_u, np.eye(4), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # case of identical rectangular arrays (5 by 3) array_a = np.arange(15).reshape(5, 3) array_b = np.copy(array_a) new_a, new_b, array_u, _ = orthogonal(array_a, array_b) assert_almost_equal(new_a, array_a, decimal=6) assert_almost_equal(new_b, array_b, decimal=6) assert_equal(array_u.shape, (3, 3)) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6)
def test_procrustes_shifted(): # square array array_a = np.array([[3.5, 0.1, 7.0], [0.5, 2.0, 1.0], [8.1, 0.3, 0.7]]) expected_a = array_a - np.mean(array_a, axis=0) # constant shift array_b = array_a + 4.1 new_a, new_b, array_u, _ = orthogonal(array_a, array_b, translate=True) #assert_almost_equal(new_b, array_b, decimal=6) assert_almost_equal(array_u, np.eye(3), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # different shift along each axis array_b = array_a + np.array([0, 3.2, 5.0]) new_a, new_b, array_u, _ = orthogonal(array_a, array_b, translate=True) # assert_almost_equal(new_b, array_b, decimal=6) assert_almost_equal(array_u, np.eye(3), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # rectangular (2 by 3) array_a = np.array([[1, 2, 3], [7, 9, 5]]) expected_a = array_a - np.array([4., 5.5, 4.]) # constant shift array_b = array_a + 0.71 new_a, new_b, array_u, _ = orthogonal(array_a, array_b, translate=True) #assert_almost_equal(new_b, array_b, decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # different shift along each axis array_b = array_a + np.array([0.3, 7.1, 4.2]) new_a, new_b, array_u, _ = orthogonal(array_a, array_b, translate=True) # assert_almost_equal(new_b, array_b, decimal=6) assert_equal(array_u.shape, (3, 3)) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6)
def test_optimal_heuristic(): # test whether it works correctly arr_a = np.array([[3, 6, 1, 0, 7], [4, 5, 2, 7, 6], [8, 6, 6, 1, 7], [4, 4, 7, 9, 4], [4, 8, 0, 3, 1]]) arr_b = np.array([[1, 8, 0, 4, 3], [6, 5, 2, 4, 7], [7, 6, 6, 8, 1], [7, 6, 1, 3, 0], [4, 4, 7, 4, 9]]) perm_guess = np.array([[0, 0, 1, 0, 0], [1, 0, 0, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], [0, 1, 0, 0, 0]]) perm_exact = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 0, 1], [1, 0, 0, 0, 0]]) error_old = error(arr_a, arr_b, perm_guess, perm_guess) perm, kopt_error = optimal_heuristic(perm=perm_guess, A=arr_a, B=arr_b, ref_error=error_old, k_opt=3) np.testing.assert_equal(perm, perm_exact) assert kopt_error == 0 # test the error exceptions np.testing.assert_raises(ValueError, optimal_heuristic, perm=perm_guess, A=arr_a, B=arr_b, ref_error=error_old, k_opt=1)
def _2sided_1trans_exact(A, B, tol): r""" """ _, UA = eigendecomposition(A) _, UB = eigendecomposition(B) # 2^n trial-and-error test to find optimum S array diags = product((-1, 1.), repeat=A.shape[0]) error_list = [] diag_list = [] for _, diag in enumerate(diags): S = np.diag(diag) U = np.dot(np.dot(UA, S), UB.T) e_temp = error(A, B, U, U) error_list.append(e_temp) diag_list.append(S) index = np.argmin(error_list) S_opt = diag_list[index] U_opt = np.dot(np.dot(UA, S_opt), UB.T) e_opt = error(A, B, U_opt, U_opt) return U_opt, e_opt
def test_rotation_translate_scale(): # initial arrays array_a = np.array([[5.1, 0], [-1.1, 4.8], [3.9, 7.3], [9.1, 6.3]]) # rotation by 68 degree & reflection in the Y=X theta = 1.18682 rotation = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) reflection = np.array([[0, 1], [1, 0]]) array_b = np.dot(4 * array_a + 3.0, np.dot(rotation, reflection)) # procrustes with translation and scaling new_a, new_b, array_u, _ = orthogonal(array_a, array_b, translate=True, scale=True) assert_almost_equal(array_u, np.dot(rotation, reflection), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6)
def test_procrustes_orthogonal_translate_scale2(): # initial array array_a = np.array([[1, 4], [7, 9]]) # define a transformation composed of rotation & reflection theta = np.pi / 2 rot = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) ref = np.array([[1, 0], [0, -1]]) trans = np.dot(rot, ref) # define array_b by transforming array_a and padding with zero array_b = np.dot(array_a, trans) array_b = np.concatenate((array_b, np.zeros((2, 5))), axis=1) array_b = np.concatenate((array_b, np.zeros((5, 7))), axis=0) # compute procrustes transformation new_a, new_b, array_u, _ = orthogonal(array_a, array_b, translate=False, scale=False) assert_almost_equal(array_u, np.dot(rot, ref), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6)
def _2sided_1trans_exact(array_a, array_b): _, array_ua = np.linalg.eigh(array_a) _, array_ub = np.linalg.eigh(array_b) # 2^n trial-and-error test to find optimum S array diags = product((-1, 1.), repeat=array_a.shape[0]) error_list = [] diag_list = [] for _, diag in enumerate(diags): array_s = np.diag(diag) array_u = np.dot(np.dot(array_ua, array_s), array_ub.T) e_temp = error(array_a, array_b, array_u, array_u) error_list.append(e_temp) diag_list.append(array_s) index = np.argmin(error_list) s_opt = diag_list[index] u_opt = np.dot(np.dot(array_ua, s_opt), array_ub.T) return u_opt
def test_procrustes_rotation_square(): # square array array_a = np.arange(4).reshape(2, 2) # rotation by 90 degree array_b = np.array([[1, 0], [3, -2]]) new_a, new_b, array_u, _ = orthogonal(array_a, array_b) assert_almost_equal(array_u, np.array([[0., -1.], [1., 0.]]), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # rotation by 180 degree array_b = -array_a new_a, new_b, array_u, _ = orthogonal(array_a, array_b) assert_almost_equal(array_u, np.array([[-1., 0.], [0., -1.]]), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # rotation by 270 degree array_b = np.array([[-1, 0], [-3, 2]]) new_a, new_b, array_u, _ = orthogonal(array_a, array_b) assert_almost_equal(array_u, np.array([[0., 1.], [-1., 0.]]), decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # rotation by 45 degree rotation = 0.5 * np.sqrt(2) * np.array([[1, -1], [1, 1]]) array_b = np.dot(array_a, rotation) new_a, new_b, array_u, _ = orthogonal(array_a, array_b) assert_almost_equal(array_u, rotation, decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # rotation by 30 degree theta = np.pi / 6 rotation = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) array_b = np.dot(array_a, rotation) new_a, new_b, array_u, _ = orthogonal(array_a, array_b) assert_almost_equal(array_u, rotation, decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6) # rotation by 72 degree theta = 1.25664 rotation = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) array_b = np.dot(array_a, rotation) new_a, new_b, array_u, _ = orthogonal(array_a, array_b) assert_almost_equal(array_u, rotation, decimal=6) assert_almost_equal(error(new_a, new_b, array_u), 0., decimal=6)
def permutation_2sided_explicit(array_a, array_b, remove_zero_col=True, remove_zero_row=True, pad_mode="row-col", translate=False, scale=False, check_finite=True): r""" Two sided permutation Procrustes by explicit method. Parameters ---------- array_a : ndarray The 2d-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed. array_b : ndarray The 2d-array :math:`\mathbf{B}_{m \times n}` representing the reference. remove_zero_col : bool, optional If True, near zero columns (less than 1e-8) on the right side will be removed. Default=True. remove_zero_row : bool, optional If True, near zero rows (less than 1e-8) on the bottom will be removed. Default= True. pad_mode : str, optional Specifying how to pad the arrays, listed below. Default="row-col". - "row" The array with fewer rows is padded with zero rows so that both have the same number of rows. - "col" The array with fewer columns is padded with zero columns so that both have the same number of columns. - "row-col" The array with fewer rows is padded with zero rows, and the array with fewer columns is padded with zero columns, so that both have the same dimensions. This does not necessarily result in square arrays. - "square" The arrays are padded with zero rows and zero columns so that they are both squared arrays. The dimension of square array is specified based on the highest dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`. translate : bool, optional If True, both arrays are translated to be centered at origin. Default=False. scale : bool, optional If True, both arrays are column normalized to unity. Default=False. check_finite : bool, optional If true, convert the input to an array, checking for NaNs or Infs. Default=True. Returns ------- new_a : ndarray The transformed ndarray A. new_b : ndarray The transformed ndarray B. array_p : ndarray The optimum permutation transformation matrix. e_opt : float Two-sided orthogonal Procrustes error. Notes ----- Given matrix :math:`\mathbf{A}_{n \times n}` and a reference :math:`\mathbf{B}_{n \times n}`, find a permutation of rows/columns of :math:`\mathbf{A}_{n \times n}` that makes it as close as possible to :math:`\mathbf{B}_{n \times n}`. But be careful that we are using a brutal way to loop over all the possible permutation matrices and return the one that gives the minimum error(distance). This method can be used as a checker for small dataset. """ print("Warning: This brute-strength method is computational expensive! \n" "But it can be used as a checker for a small dataset.") # check inputs new_a, new_b = setup_input_arrays(array_a, array_b, remove_zero_col, remove_zero_row, pad_mode, translate, scale, check_finite) perm1 = np.zeros(np.shape(new_a)) perm_error1 = np.inf for comb in it.permutations(np.arange(np.shape(new_a)[0])): # Compute the permutation matrix size = np.shape(new_a)[1] perm2 = np.zeros((size, size)) perm2[np.arange(size), comb] = 1 perm_error2 = error(new_a, new_b, perm2, perm2) if perm_error2 < perm_error1: perm_error1 = perm_error2 perm1 = perm2 return new_a, new_b, perm1, perm_error1
def _2sided_regular(array_m, array_n, tol, iteration): # Regular two-sided permutation Procrustes # :math:` {\(\vert M-PNQ \vert\)}^2_F` # taken from page 64 in # parallel solution of svd-related problems, with applications # Pythagoras Papadimitriou, University of Manchester, 1993 # Fix P = I first # Initial guess for P array_p1 = np.eye(array_m.shape[0], array_m.shape[0]) # Initial guess for Q array_q1 = _2sided_hungarian(np.dot(array_n.T, array_m)) e_opt1 = error(array_n, array_m, array_p1.T, array_q1) step1 = 0 # while loop for the original algorithm while e_opt1 > tol and step1 < iteration: step1 += 1 # Update P array_p1 = _2sided_hungarian( np.dot(np.dot(array_n, array_q1), array_m.T)) array_p1 = np.transpose(array_p1) # Update the error e_opt1 = error(array_n, array_m, array_p1.T, array_q1) if e_opt1 > tol: # Update Q array_q1 = _2sided_hungarian( np.dot(np.dot(array_n.T, array_p1.T), array_m)) # Update the error e_opt1 = error(array_n, array_m, array_p1.T, array_q1) else: break if step1 == iteration: print("Maximum iteration reached in the first case! Error={0}". format(e_opt1)) # Fix Q = I first # Initial guess for Q array_q2 = np.eye(array_m.shape[1], array_m.shape[1]) # Initial guess for P array_p2 = _2sided_hungarian(np.dot(array_n, array_m.T)) array_p2 = np.transpose(array_p2) e_opt2 = error(array_n, array_m, array_p2.T, array_q2) step2 = 0 # while loop for the original algorithm while e_opt2 > tol and step2 < iteration: # Update Q array_q2 = _2sided_hungarian( np.dot(np.dot(array_n.T, array_p2.T), array_m)) # Update the error e_opt2 = error(array_n, array_m, array_p2.T, array_q1) if e_opt2 > tol: array_p2 = _2sided_hungarian( np.dot(np.dot(array_n, array_q2), array_m.T)) array_p2 = np.transpose(array_p2) # Update the error e_opt2 = error(array_n, array_m, array_p2.T, array_q2) step2 += 1 else: break if step2 == iteration: print("Maximum iteration reached in the second case! Error={0}". format(e_opt2)) if e_opt1 <= e_opt2: array_p = array_p1 array_q = array_q1 e_opt = e_opt1 else: array_p = array_p2 array_q = array_q2 e_opt = e_opt2 return array_p, array_q, e_opt
def permutation(array_a, array_b, remove_zero_col=True, remove_zero_row=True, pad_mode="row-col", translate=False, scale=False, check_finite=True): r""" Single sided permutation Procrustes. Parameters ---------- array_a : ndarray The 2d-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed. array_b : ndarray The 2d-array :math:`\mathbf{B}_{m \times n}` representing the reference. remove_zero_col : bool, optional If True, the zero columns on the right side will be removed. Default= True. remove_zero_row : bool, optional If True, the zero rows on the top will be removed. Default= True. pad_mode : str, optional Specifying how to pad the arrays, listed below. Default="row-col". - "row" The array with fewer rows is padded with zero rows so that both have the same number of rows. - "col" The array with fewer columns is padded with zero columns so that both have the same number of columns. - "row-col" The array with fewer rows is padded with zero rows, and the array with fewer columns is padded with zero columns, so that both have the same dimensions. This does not necessarily result in square arrays. - "square" The arrays are padded with zero rows and zero columns so that they are both squared arrays. The dimension of square array is specified based on the highest dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`. translate : bool, optional If True, both arrays are translated to be centered at origin, ie columns of the arrays will have mean zero. Default=False. scale : bool, optional If True, both arrays are normalized to one with respect to the Frobenius norm, ie :math:`Tr(A^T A) = 1`. Default=False. check_finite : bool, optional If true, convert the input to an array, checking for NaNs or Infs. Default=True. Returns ------- A : ndarray The transformed ndarray A. B : ndarray The transformed ndarray B. U_opt : ndarray The optimum permutation transformation matrix. e_opt : float One-sided permutation Procrustes error. Notes ----- Given matrix :math:`\mathbf{A}_{n \times n}` and reference :math:`\mathbf{B}_{n \times n}` find a permutation of the rows and/or columns of :math:`\mathbf{A}_{n \times n}` that makes it as close as possible to :math:`\mathbf{B}_{n \times n}`. I.e., .. math:: \underbrace{\text{min}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \|\mathbf{A} \mathbf{P} - \mathbf{B}\|_{F}^2 &= \underbrace{\text{min}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\left(\mathbf{A}\mathbf{P} - \mathbf{B} \right)^\dagger \left(\mathbf{P}^\dagger\mathbf{A}\mathbf{P} - \mathbf{B} \right)\right] \\ &= \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\mathbf{A}^\dagger\mathbf{B} \right] Here, :math:`\mathbf{P}_{n \times n}` is the permutation matrix. The solution is to relax the problem into a linear programming problem and note that the solution to a linear programming problem is always at the boundary of the allowed region, which means that the solution can always be written as a permutation matrix, .. math:: \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\mathbf{A}^\dagger\mathbf{B} \right] = \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \geq 0 \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\left(\mathbf{A}^\dagger\mathbf{B}\right) \right] This is a matching problem and can be solved by the Hungarian method. Note that if :math:`\mathbf{A}` and :math:`\mathbf{B}` have different numbers of items, you choose the larger matrix as :math:`\mathbf{B}` and then pad :math:`\mathbf{A}` with rows/columns of zeros. """ # check inputs new_a, new_b = setup_input_arrays(array_a, array_b, remove_zero_col, remove_zero_row, pad_mode, translate, scale, check_finite) # compute permutation Procrustes matrix array_p = np.dot(new_a.T, new_b) array_c = np.full(array_p.shape, np.max(array_p)) array_c -= array_p array_u = np.zeros(array_p.shape) # set elements to 1 according to Hungarian algorithm (linear_sum_assignment) array_u[linear_sum_assignment(array_c)] = 1 e_opt = error(new_a, new_b, array_u) return new_a, new_b, array_u, e_opt
def permutation_2sided(array_a, array_b, transform_mode="single_undirected", remove_zero_col=True, remove_zero_row=True, pad_mode="row-col", translate=False, scale=False, mode="normal1", check_finite=True, iteration=500, add_noise=False, tol=1.0e-8): r""" Single sided permutation Procrustes. Parameters ---------- array_a : ndarray The 2d-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed. array_b : ndarray The 2d-array :math:`\mathbf{B}_{m \times n}` representing the reference. transform_mode : str If transform_mode="single_undirected", two-sided permutation Procrustes with one transformation will be performed. If transform_mode="single_directed", two-sided permutation for directed graph matching will be used. Otherwise, transform_mode="double", the two-sided permutation Procrustes with two transformations will be performed. Default="single_undirected". remove_zero_col : bool, optional If True, zero columns (values less than 1e-8) on the right side will be removed. Default= True. remove_zero_row : bool, optional If True, zero rows (values less than 1e-8) on the bottom will be removed. Default= True. pad_mode : str, optional Specifying how to pad the arrays, listed below. Default="row-col". - "row" The array with fewer rows is padded with zero rows so that both have the same number of rows. - "col" The array with fewer columns is padded with zero columns so that both have the same number of columns. - "row-col" The array with fewer rows is padded with zero rows, and the array with fewer columns is padded with zero columns, so that both have the same dimensions. This does not necessarily result in square arrays. - "square" The arrays are padded with zero rows and zero columns so that they are both squared arrays. The dimension of square array is specified based on the highest dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`. translate : bool, optional If True, both arrays are translated to be centered at origin, ie columns of the arrays will have mean zero. Default=False. scale : bool, optional If True, both arrays are normalized to one with respect to the Frobenius norm, ie :math:`Tr(A^T A) = 1`. Default=False. mode : string, optional Option for choosing the initial guess methods, including "normal1", "normal2", "umeyama" and "umeyama_approx". "umeyama_approx" is the approximated umeyama method. check_finite : bool, optional If true, convert the input to an array, checking for NaNs or Infs. Default=True. iteration : int, optional Maximum number for iterations. Default=500. add_noise : bool, optional Add small noise if the arrays are non-diagonalizable. Default=False. tol : float, optional The tolerance value used for updating the initial guess. Default=1.e-8 Returns ------- new_a : ndarray The transformed ndarray A. new_b : ndarray The transformed ndarray B. array_u : ndarray The optimum permutation transformation matrix. array_p : ndarray The optimum permutation transformation matrix when using double transform mode. array_q : ndarray The optimum permutation transformation matrix when using double transform mode. e_opt : float Two-sided permutation Procrustes error. Notes ----- Given matrix :math:`\mathbf{A}_{n \times n}` and a reference :math:`\mathbf{B}_{n \times n}`, find a permutation of rows/columns of :math:`\mathbf{A}_{n \times n}` that makes it as close as possible to :math:`\mathbf{B}_{n \times n}`. I.e., .. math:: &\underbrace{\text{min}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \|\mathbf{P}^\dagger \mathbf{A} \mathbf{P} - \mathbf{B}\|_{F}^2\\ = &\underbrace{\text{min}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\left(\mathbf{P}^\dagger\mathbf{A}\mathbf{P} - \mathbf{B} \right)^\dagger \left(\mathbf{P}^\dagger\mathbf{A}\mathbf{P} - \mathbf{B} \right)\right] \\ = &\underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\mathbf{A}^\dagger\mathbf{P}\mathbf{B} \right]\\ Here, :math:`\mathbf{P}_{n \times n}` is the permutation matrix. Given an intial guess, the best local minimum can be obtained by the iterative procedure, .. math:: p_{ij}^{(n + 1)} = p_{ij}^{(n)} \sqrt{ \frac{2\left[\mathbf{T}^{(n)}\right]_{ij}}{\left[ \mathbf{P}^{(n)} \left( \left(\mathbf{P}^{(n)}\right)^T \mathbf{T} + \left( \left(\mathbf{P}^{(n)}\right)^T \mathbf{T} \right)^T \right) \right]_{ij}} } where, .. math:: \mathbf{T}^{(n)} = \mathbf{A} \mathbf{P}^{(n)} \mathbf{B} Using an initial guess, the iteration can stops when the change in :math:`d` is below the specified threshold, .. math:: d = \text{Tr} \left[\left(\mathbf{P}^{(n+1)} -\mathbf{P}^{(n)} \right)^T \left(\mathbf{P}^{(n+1)} -\mathbf{P}^{(n)} \right)\right] The outcome of the iterative procedure :math:`\mathbf{P}^{(\infty)}` is not a permutation matrix. So, the closest permutation can be found by setting ``refinement=True``. This uses :class:`procrustes.permutation.PermutationProcrustes` to find the closest permutation; that is, .. math:: \underbrace{\text{min}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \|\mathbf{P} - \mathbf{P}^{(\infty)}\|_{F}^2 = \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\mathbf{P}^{(\infty)} \right] The answer to this problem is a heuristic solution for the matrix-matching problem that seems to be relatively accurate. **Initial Guess:** Two possible initial guesses are inferred from the Umeyama procedure. One can find either the closest permutation matrix to :math:`\mathbf{U}_\text{Umeyama}` or to :math:`\mathbf{U}_\text{Umeyama}^\text{approx.}`. Considering the :class:`procrustes.permutation.PermutationProcrustes`, the resulting permutation matrix can be specified as initial guess through ``guess=umeyama`` and ``guess=umeyama_approx``, which solves: .. math:: \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\mathbf{U}_\text{Umeyama} \right] \\ \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\mathbf{U}_\text{Umeyama}^\text{approx.} \right] Another choice is to start by solving a normal permutation Procrustes problem. In other words, write new matrices, :math:`\mathbf{A}^0` and :math:`\mathbf{B}^0`, with columns like, .. math:: \begin{bmatrix} a_{ii} \\ p \cdot \text{sgn}\left( a_{ij_\text{max}} \right) \underbrace{\text{max}}_{1 \le j \le n} \left(\left|a_{ij}\right|\right)\\ p^2 \cdot \text{sgn}\left( a_{ij_{\text{max}-1}} \right) \underbrace{\text{max}-1}_{1 \le j \le n} \left(\left|a_{ij}\right|\right)\\ \vdots \end{bmatrix} Here, :math:`\text{max}-1` denotes the second-largest absolute value of elements, :math:`\text{max}-2` is the third-largest abosule value of elements, etc. The matrices :math:`\mathbf{A}^0` and :math:`\mathbf{B}^0` have the diagonal elements of :math:`\mathbf{A}` and :math:`\mathbf{B}` in the first row, and below the first row has the largest off-diagonal element in row :math:`i`, the second-largest off-diagonal element, etc. The elements are weighted by a factor :math:`0 < p < 1`, so that the smaller elements are considered less important for matching. The matrices can be truncated after a few terms; for example, after the size of elements falls below some threshold. A reasonable choice would be to stop after :math:`\lfloor \frac{-2\ln 10}{\ln p} +1\rfloor` rows; this ensures that the size of the elements in the last row is less than 1% of those in the first off-diagonal row. There are obviously many different ways to construct the matrices :math:`\mathbf{A}^0` and :math:`\mathbf{B}^0`. Another, even better, method would be to try to encode not only what the off-diagonal elements are, but which element in the matrix they correspond to. One could do that by not only listing the diagonal elements, but also listing the associated off-diagonal element. I.e., the columns of :math:`\mathbf{A}^0` and :math:`\mathbf{B}^0` would be, .. math:: \begin{bmatrix} a_{ii} \\ p \cdot a_{j_\text{max} j_\text{max}} \\ p \cdot \text{sgn}\left( a_{ij_\text{max}} \right) \underbrace{\text{max}}_{1 \le j \le n} \left(\left|a_{ij}\right|\right)\\ p^2 \cdot a_{j_{\text{max}-1} j_{\text{max}-1}} \\ p^2 \cdot \text{sgn}\left( a_{ij_{\text{max}-1}} \right) \underbrace{\text{max}-1}_{1 \le j \le n} \left(\left|a_{ij}\right|\right)\\ \vdots \end{bmatrix} In this case, you would stop the procedure after :math:`m = \left\lfloor {\frac{{ - 4\ln 10}}{{\ln p}} + 1} \right \rfloor` rows. Then one uses the :class:`procrustes.permutation.PermutationProcrustes` to match the constructed matrices :math:`\mathbf{A}^0` and :math:`\mathbf{B}^0` instead of :math:`\mathbf{A}` and :math:`\mathbf{B}`. I.e., .. math:: \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger \left(\mathbf{A^0}^\dagger\mathbf{B^0}\right)\right] Please note that the "umeyama_approx" might give inaccurate permutation matrix. More specificity, this is a approximated Umeyama method. One example we can give is that when we compute the permutation matrix that transforms :math:`A` to :math:`B`, the "umeyama_approx" method can not give the exact permutation transformation matrix while "umeyama", "normal1" and "normal2" do. .. math:: A = \begin{bmatrix} 4 & 5 & -3 & 3 \\ 5 & 7 & 3 & -5 \\ -3 & 3 & 2 & 2 \\ 3 & -5 & 2 & 5 \\ \end{bmatrix} \\ B = \begin{bmatrix} 73 & 100 & 73 & -62 \\ 100 & 208 & -116 & 154 \\ 73 & -116 & 154 & 100 \\ -62 & 154 & 100 & 127 \\ \end{bmatrix} \\ """ # check inputs new_a, new_b = setup_input_arrays(array_a, array_b, remove_zero_col, remove_zero_row, pad_mode, translate, scale, check_finite) # np.power() can not handle the negatives values # Try to convert the matrices to non-negative maximum = np.max(np.abs(new_b)) if np.max(np.abs(new_b)) > np.max( np.abs(new_a)) else np.max(np.abs(new_a)) new_a += maximum new_b += maximum # A += np.min(A, B) # B += np.min(A, B) # Do single-transformation computation if requested transform_mode = transform_mode.lower() if transform_mode == "single_undirected": # the initial guess guess = _guess_initial_permutation(new_a, new_b, mode, add_noise) # Compute the permutation matrix by iterations array_u = _compute_transform(new_a, new_b, guess, tol, iteration) e_opt = error(new_a, new_b, array_u, array_u) return new_a, new_b, array_u, e_opt elif transform_mode == "single_directed": # the initial guess guess = _2sided_1trans_initial_guess_directed(new_a, new_b) # Compute the permutation matrix by iterations array_u = _compute_transform_directed(new_a, new_b, guess, tol, iteration) e_opt = error(new_a, new_b, array_u, array_u) return new_a, new_b, array_u, e_opt # Do regular computation elif transform_mode == "double": array_m = new_a array_n = new_b array_p, array_q, e_opt = _2sided_regular(array_m, array_n, tol, iteration) return array_m, array_n, array_p, array_q, e_opt else: raise ValueError(""" Invalid transform_mode argument, use "single_undirected", "single_directed", or "double" """)
def rotational(A, B, remove_zero_col=True, remove_zero_row=True, pad_mode='row-col', translate=False, scale=False, check_finite=True): r""" Compute optimal rotational-orthogonal transformation array. The Procrustes analysis requires two 2d-arrays with the same number of rows, so the array with the smaller number of rows will automatically be padded with zero rows. Parameters ---------- a : ndarray The 2d-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed. b : ndarray The 2d-array :math:`\mathbf{B}_{m \times n}` representing the reference array. remove_zero_col : bool, optional If True, the zero columns on the right side will be removed. Default= True. remove_zero_row : bool, optional If True, the zero rows on the top will be removed. Default= True. pad_mode : str, optional Zero padding mode when the sizes of two arrays differ. Default='row-col'. 'row': The array with fewer rows is padded with zero rows so that both have the same number of rows. 'col': The array with fewer columns is padded with zero columns so that both have the same number of columns. 'row-col': The array with fewer rows is padded with zero rows, and the array with fewer columns is padded with zero columns, so that both have the same dimensions. This does not necessarily result in square arrays. 'square': The arrays are padded with zero rows and zero columns so that they are both squared arrays. The dimension of square array is specified based on the highest dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`.' translate : bool, optional If True, both arrays are translated to be centered at origin. scale : bool, optional If True, both arrays are column normalized to unity. check_finite : bool, optional If true, convert the input to an array, checking for NaNs or Infs. Returns ------- a : ndarray The transformed ndarray A. b : ndarray The transformed ndarray B. u_opt : ndarray The optimum rotation transformation matrix. e_opt : float One-sided orthogonal Procrustes error. Notes ----- Given matrix :math:`\mathbf{A}_{m \times n}` and a reference :math:`\mathbf{B}_{m \times n}`, find the transformation of :math:`\mathbf{A}_{m \times n}` that makes it as close as possible to :math:`\mathbf{B}_{m \times n}`. I.e., .. math:: \underbrace{\min}_{\left\{\mathbf{U} \left| {\mathbf{U}^{-1} = {\mathbf{U}}^\dagger \atop \left| \mathbf{U} \right| = 1} \right. \right\}} \|\mathbf{A}\mathbf{U} - \mathbf{B}\|_{F}^2 &= \underbrace{\min}_{\left\{\mathbf{U} \left| {\mathbf{U}^{-1} = {\mathbf{U}}^\dagger \atop \left| \mathbf{U} \right| = 1} \right. \right\}} \text{Tr}\left[\left(\mathbf{A}\mathbf{U} - \mathbf{B} \right)^\dagger \left(\mathbf{A}\mathbf{U} - \mathbf{B} \right)\right] \\ &= \underbrace{\max}_{\left\{\mathbf{U} \left| {\mathbf{U}^{-1} = {\mathbf{U}}^\dagger \atop \left| \mathbf{U} \right| = 1} \right. \right\}} \text{Tr}\left[\mathbf{U}^\dagger {\mathbf{A}}^\dagger \mathbf{B} \right] Here, :math:`\mathbf{U}_{n \times n}` is the permutation matrix. The solution is obtained by taking the singular value decomposition (SVD) of the product of the matrix, .. math:: \mathbf{A}^\dagger \mathbf{B} &= \tilde{\mathbf{U}} \tilde{\mathbf{\Sigma}} \tilde{\mathbf{V}}^{\dagger} \\ \mathbf{U}_{\text{optimum}} &= \tilde{\mathbf{U}} \tilde{\mathbf{S}} \tilde{\mathbf{V}}^{\dagger} Where :math:`\tilde{\mathbf{S}}_{n \times m}` is almost an identity matrix, .. math:: \tilde{\mathbf{S}}_{m \times n} \equiv \begin{bmatrix} 1 & 0 & \cdots & 0 & 0 \\ 0 & 1 & \ddots & \vdots &0 \\ 0 & \ddots &\ddots & 0 &\vdots \\ \vdots&0 & 0 & 1 &0 \\ 0 & 0 & 0 \cdots &0 &\operatorname{sgn} \left(\left|\mathbf{U}\mathbf{V}^\dagger\right|\right) \end{bmatrix} I.e. the smallest singular value is replaced by .. math:: \operatorname{sgn} \left(\left|\tilde{\mathbf{U}} \tilde{\mathbf{V}}^\dagger\right|\right) = \begin{cases} +1 \qquad \left|\tilde{\mathbf{U}} \tilde{\mathbf{V}}^\dagger\right| \geq 0 \\ -1 \qquad \left|\tilde{\mathbf{U}} \tilde{\mathbf{V}}^\dagger\right| < 0 \end{cases} Examples -------- >>> import numpy as np >>> array_a = np.array([[1.5, 7.4], [8.5, 4.5]]) >>> array_b = np.array([[6.29325035, 4.17193001, 0., 0,], [9.19238816, -2.82842712, 0., 0.], [0., 0., 0., 0.]]) >>> new_a, new_b, array_u, error_opt = rotational(array_a, array_b, translate=False, scale=False) >>> array_u # rotational array array([[ 0.70710678, -0.70710678], [ 0.70710678, 0.70710678]]) >>> error_opt # error 1.483808210011695e-17 """ # check inputs A, B = _get_input_arrays(A, B, remove_zero_col, remove_zero_row, pad_mode, translate, scale, check_finite) # compute SVD of A.T * A U, _, VT = singular_value_decomposition(np.dot(A.T, B)) # construct S which is an identity matrix with the smallest # singular value replaced by sgn(|U*V^t|). S = np.eye(A.shape[1]) S[-1, -1] = np.sign(np.linalg.det(np.dot(U, VT))) # compute optimum rotation matrix U_opt = np.dot(np.dot(U, S), VT) # compute single-sided error error e_opt = error(A, B, U_opt) return A, B, U_opt, e_opt
def orthogonal_2sided(A, B, remove_zero_col=True, remove_zero_row=True, pad_mode='row-col', translate=False, scale=False, single_transform=True, mode="exact", check_finite=True, tol=1.0e-8): r""" Two-Sided Orthogonal Procrustes. Parameters ---------- A : ndarray The 2d-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed. B : ndarray The 2d-array :math:`\mathbf{B}_{m \times n}` representing the reference array. remove_zero_col : bool, optional If True, the zero columns on the right side will be removed. Default= True. remove_zero_row : bool, optional If True, the zero rows on the top will be removed. Default= True. pad_mode : str, optional Zero padding mode when the sizes of two arrays differ. Default='row-col'. 'row': The array with fewer rows is padded with zero rows so that both have the same number of rows. 'col': The array with fewer columns is padded with zero columns so that both have the same number of columns. 'row-col': The array with fewer rows is padded with zero rows, and the array with fewer columns is padded with zero columns, so that both have the same dimensions. This does not necessarily result in square arrays. 'square': The arrays are padded with zero rows and zero columns so that they are both squared arrays. The dimension of square array is specified based on the highest dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`.' translate : bool, optional If True, both arrays are translated to be centered at origin. Default=False. scale : bool, optional If True, both arrays are column normalized to unity. Default=False. single_transform : bool If True, two-sided orthogonal Procrustes with one transformation will be performed. Default=False. mode : string, optional The scheme to solve for unitary transformation. Options: 'exact' and 'approx'. Default="exact". check_finite : bool, optional If true, convert the input to an array, checking for NaNs or Infs. Default=True. tol : float, optional The tolerance value used for 'approx' mode. Default=1.e-8. Returns ------- A : ndarray The transformed ndarray A. B : ndarray The transformed ndarray B. U_opt1 : ndarray The optimal orthogonal left-multiplying transformation ndarray if "single_transform=True". U_opt2 : ndarray The second transformation ndarray if "single_transform=True". U_opt : ndarray The transformation ndarray if "single_transform=False". e_opt : float The single- or double- sided orthogonal Procrustes error. Raises ------ ValueError When input array `A` or `B` is not symmetric. numpy.linalg.LinAlgError If array `A` or `B` is not diagonalizable when `mode='umeyama'` or `mode='umeyama_approx'`. ValueError If the mode is not 'exact' or 'approx' when `single_transform=True`. Notes ----- **Two-Sided Orthogonal Procrustes:** Given matrix :math:`\mathbf{A}_{m \times n}` and a reference :math:`\mathbf{B}_{m \times n}`, find two unitary/orthogonal transformation of :math:`\mathbf{A}_{m \times n}` that makes it as as close as possible to :math:`\mathbf{B}_{m \times n}`. I.e., .. math:: \underbrace{\text{min}}_{\left\{ {\mathbf{U}_1 \atop \mathbf{U}_2} \left| {\mathbf{U}_1^{-1} = \mathbf{U}_1^\dagger \atop \mathbf{U}_2^{-1} = \mathbf{U}_2^\dagger} \right. \right\}} \|\mathbf{U}_1^\dagger \mathbf{A} \mathbf{U}_2 - \mathbf{B}\|_{F}^2 &= \underbrace{\text{min}}_{\left\{ {\mathbf{U}_1 \atop \mathbf{U}_2} \left| {\mathbf{U}_1^{-1} = \mathbf{U}_1^\dagger \atop \mathbf{U}_2^{-1} = \mathbf{U}_2^\dagger} \right. \right\}} \text{Tr}\left[\left(\mathbf{U}_1^\dagger\mathbf{A}\mathbf{U}_2 - \mathbf{B} \right)^\dagger \left(\mathbf{U}_1^\dagger\mathbf{A}\mathbf{U}_2 - \mathbf{B} \right)\right] \\ &= \underbrace{\text{min}}_{\left\{ {\mathbf{U}_1 \atop \mathbf{U}_2} \left| {\mathbf{U}_1^{-1} = \mathbf{U}_1^\dagger \atop \mathbf{U}_2^{-1} = \mathbf{U}_2^\dagger} \right. \right\}} \text{Tr}\left[\mathbf{U}_2^\dagger\mathbf{A}^\dagger\mathbf{U}_1\mathbf{B} \right] We can get the solution by taking singular value decomposition of the matrices. Having, .. math:: \mathbf{A} = \mathbf{U}_A \mathbf{\Sigma}_A \mathbf{V}_A^\dagger \\ \mathbf{B} = \mathbf{U}_B \mathbf{\Sigma}_B \mathbf{V}_B^\dagger The transformation is foubd by, .. math:: \mathbf{U}_1 = \mathbf{U}_A \mathbf{U}_B^\dagger \\ \mathbf{U}_2 = \mathbf{V}_B \mathbf{V}_B^\dagger **Two-Sided Orthogonal Procrustes with Single-Transformation:** Given matrix :math:`\mathbf{A}_{n \times n}` and a reference :math:`\mathbf{B}_{n \times n}`, find one unitary/orthogonal transformation matrix :math:`\mathbf{U}_{n \times n}` that makes :math:`\mathbf{A}_{n \times n}` as close as possible to :math:`\mathbf{B}_{n \times n}`. I.e., .. math:: \underbrace{\min}_{\left\{\mathbf{U} | \mathbf{U}^{-1} = {\mathbf{U}}^\dagger \right\}} \|\mathbf{U}^\dagger\mathbf{A}\mathbf{U} - \mathbf{B}\|_{F}^2 &= \underbrace{\text{min}}_{\left\{\mathbf{U} | \mathbf{U}^{-1} = {\mathbf{U}}^\dagger \right\}} \text{Tr}\left[\left(\mathbf{U}^\dagger\mathbf{A}\mathbf{U} - \mathbf{B} \right)^\dagger \left(\mathbf{U}^\dagger\mathbf{A}\mathbf{U} - \mathbf{B} \right)\right] \\ &= \underbrace{\text{max}}_{\left\{\mathbf{U} | \mathbf{U}^{-1} = {\mathbf{U}}^\dagger \right\}} \text{Tr}\left[\mathbf{U}^\dagger\mathbf{A}^\dagger\mathbf{U}\mathbf{B} \right] Taking the eigenvalue decomposition of the matrices: .. math:: \mathbf{A} = \mathbf{U}_A \mathbf{\Lambda}_A \mathbf{U}_A^\dagger \\ \mathbf{B} = \mathbf{U}_B \mathbf{\Lambda}_B \mathbf{U}_B^\dagger the solution is obtained by, .. math:: \mathbf{U} = \mathbf{U}_A \mathbf{S} \mathbf{U}_A^\dagger where :math:`\mathbf{S}` is a diagonal matrix for which every diagonal element is :math:`\pm{1}`, .. math:: \mathbf{S} = \begin{bmatrix} { \pm 1} & 0 &\cdots &0 \\ 0 &{ \pm 1} &\ddots &\vdots \\ \vdots &\ddots &\ddots &0\\ 0 &\cdots &0 &{ \pm 1} \end{bmatrix} Finding the best choice of :math:`\mathbf{S}` requires :math:`2^n` trial-and-error tests. This is called the ``exact`` scheme for solving the probelm. A heuristic, due to Umeyama, is to take the element-wise absolute value of the elements of the unitary transformations, .. math:: \mathbf{U}_\text{Umeyama} = \text{abs}(\mathbf{U}_A) \cdot \text{abs}(\mathbf{U}_B^\dagger) This is not actually a unitary matrix. But we can use the orthogonal procrustes problem to find the closest unitray matrix (i.e., the closest matrix that is unitarily equivalent to the identity matrix), .. math:: \underbrace{\min}_{\left\{\mathbf{U} | \mathbf{U}^{-1} = {\mathbf{U}}^\dagger \right\}} \|\mathbf{I}\mathbf{U} - \mathbf{U}_\text{Umeyama}\|_{F}^2 &= \underbrace{\text{min}}_{\left\{\mathbf{U} | \mathbf{U}^{-1} = {\mathbf{U}}^\dagger \right\}} \text{Tr}\left[\left(\mathbf{U} - \mathbf{U}_\text{Umeyama} \right)^\dagger \left(\mathbf{U} - \mathbf{U}_\text{Umeyama} \right)\right] \\ &= \underbrace{\text{max}}_{\left\{\mathbf{U} | \mathbf{U}^{-1} = {\mathbf{U}}^\dagger \right\}} \text{Tr}\left[\mathbf{U}^\dagger \mathbf{U}_\text{Umeyama} \right] considering the singular value decomposition of :math:`\mathbf{U}_\text{Umeyama}`, .. math:: \mathbf{U}_\text{Umeyama} = \tilde{\mathbf{U}} \tilde{\mathbf{\Sigma}} \tilde{\mathbf{V}}^\dagger the solution is give by, .. math:: \mathbf{U}_\text{Umeyama}^\text{approx} = \tilde{\mathbf{U}} \tilde{\mathbf{V}}^\dagger This is called the ``approx`` scheme for solving the problem. Please note that the translation operation is not well defined for two sided orthogonal procrustes since two sided rotation and translation don't commute. Therefore, please be careful when setting translate=True. Examples -------- >>> import numpy as np >>> array_a = np.array([[30, 33, 20], [33, 53, 43], [20, 43, 46]]) >>> array_b = np.array([ [ 22.78131838, -0.58896768,-43.00635291, 0., 0.], [ -0.58896768, 16.77132475, 0.24289990, 0., 0.], [-43.00635291, 0.2428999 , 89.44735687, 0., 0.], [ 0. , 0. , 0. , 0., 0.]]) >>> new_a, new_b, array_u, error_opt = orthogonal_2sided( array_a, array_b, single_transform=True, remove_zero_col=True, remove_zero_rwo=True, mode='exact') >>> array_u array([[ 0.25116633, 0.76371527, 0.59468855], [-0.95144277, 0.08183302, 0.29674906], [ 0.17796663, -0.64034549, 0.74718507]]) >>> error_opt 1.9646186414076689e-26 """ # Check symmetry if single_transform=True if single_transform: if (not np.allclose(A.T, A)): raise ValueError('Array A should be symmetric.') if (not np.allclose(B.T, B)): raise ValueError('Array B should be symmetric.') if translate: warnings.warn("The translation matrix was not well defined. \ Two sided rotation and translation don't commute.", stacklevel=2) # Check inputs A, B = _get_input_arrays(A, B, remove_zero_col, remove_zero_row, pad_mode, translate, scale, check_finite) # Convert mode strings into lowercase mode = mode.lower() # Do single-transformation computation if requested if single_transform: # check A and B are symmetric if mode == "approx": U = _2sided_1trans_approx(A, B, tol) e_opt = error(A, B, U, U) elif mode == "exact": U, e_opt = _2sided_1trans_exact(A, B, tol) # e_opt = error(A, B, U, U) else: raise ValueError("Invalid mode argument (use 'exact' or 'approx')") return A, B, U, e_opt # Do regular two-sided orthogonal Procrustes calculations else: U_opt1, U_opt2 = _2sided(A, B) e_opt = error(A, B, U_opt1, U_opt2) return A, B, U_opt1, U_opt2, e_opt
def symmetric(array_a, array_b, remove_zero_col=True, remove_zero_row=True, pad_mode='row-col', translate=False, scale=False, check_finite=True): r""" Symmetric right-sided Procrustes transformation. The symmetric Procrustes requires both matrices to have the number of rows greater than equal to the number of columns. Further, it is assumed that each matrix has the same dimension, if not padding will occur. Parameters ---------- array_a : ndarray The 2d-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed. array_b : ndarray The 2d-array :math:`\mathbf{B}_{m \times n}` representing the reference. remove_zero_col : bool, optional If True, zero columns (values less than 1e-8) on the right side will be removed. Default=True. remove_zero_row : bool, optional If True, zero rows (values less than 1e-8) on the bottom will be removed. Default=True. pad_mode : str, optional Specifying how to pad the arrays, listed below. Default="row-col". - "row" The array with fewer rows is padded with zero rows so that both have the same number of rows. - "col" The array with fewer columns is padded with zero columns so that both have the same number of columns. - "row-col" The array with fewer rows is padded with zero rows, and the array with fewer columns is padded with zero columns, so that both have the same dimensions. This does not necessarily result in square arrays. - "square" The arrays are padded with zero rows and zero columns so that they are both squared arrays. The dimension of square array is specified based on the highest dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`. translate : bool, optional If True, both arrays are translated to be centered at origin, ie columns of the arrays will have mean zero. Default=False. scale : bool, optional If True, both arrays are normalized to one with respect to the Frobenius norm, ie :math:`Tr(A^T A) = 1`. Default=False. check_finite : bool, optional If true, convert the input to an array, checking for NaNs or Infs. Default=True. Returns ------- new_a : ndarray The transformed ndarray array_a. new_b : ndarray The transformed ndarray array_b. array_x : ndarray The optimum symmetric transformation array. e_opt : float One-sided Procrustes error. Raises ------ ValueError : If their matrix dimension (m, n) don't satisfy :math:`m \geq n` after padding. Notes ----- Given matrix :math:`\mathbf{A}_{m \times n}` and a reference :math:`\mathbf{B}_{m \times n}`, with :math:`m \geqslant n`, find the symmetric matrix :math:`\mathbf{X}_{n \times n}` for which :math:`\mathbf{AX}` is as close as possible to :math:`\mathbf{B}_{m \times n}`. I.e., .. math:: \underbrace{\text{min}}_{\left\{\mathbf{X} \left| \mathbf{X} = \mathbf{X}^\dagger \right. \right\}} \|\mathbf{A} \mathbf{X} - \mathbf{B}\|_{F}^2 = \underbrace{\text{min}}_{\left\{\mathbf{X} \left| \mathbf{X} = \mathbf{X}^\dagger \right. \right\}} \text{Tr}\left[\left(\mathbf{A}\mathbf{X} - \mathbf{B} \right)^\dagger \left(\mathbf{A}\mathbf{X} - \mathbf{B} \right)\right] Considering the singular value decomposition of :math:`\mathbf{A}_{m \times n}` as .. math:: \mathbf{A}_{m \times n} = \mathbf{U}_{m \times m} \begin{bmatrix} \mathbf{\Sigma}_{m \times m} \\ \mathbf{0}_{m \times (n - m)} \end{bmatrix} \mathbf{V}_{n \times n}^\dagger where :math:`\mathbf{\Sigma}_{n \times n}` is a square diagonal matrix with nonnegative elements denoted by :math:`\sigma_i` listed in decreasing order, define .. math:: \mathbf{C}_{m \times n} = \mathbf{U}_{m \times m}^\dagger \mathbf{A}_{m \times n}^0 \mathbf{V}_{n \times n} Then the elements of the optimal matrix :math:`\mathbf{X}_{n \times n}` are .. math:: x_{ij} = \begin{cases} 0 && i \text{ and } j > \text{rank} \left(\mathbf{B}\right) \\ \frac{\sigma_i c_{ij} + \sigma_j c_{ji}}{\sigma_i^2 + \sigma_j^2} && \text{otherwise} \end{cases} Notice that the first part of this constrain only works in the unusual case where :math:`\mathbf{B}` has rank less than :math:`n`. References ---------- 1. Higham, Nicholas J. The Symmetric Procrustes problem. BIT Numerical Mathematics, 28 (1), 133-143, 1988. Examples -------- >>> import numpy as np >>> array_a = np.array([[5., 2., 8.], \ [2., 2., 3.], \ [1., 5., 6.], \ [7., 3., 2.]]) >>> array_b = np.array([[ 52284.5, 209138. , 470560.5], \ [ 22788.5, 91154. , 205096.5], \ [ 46139.5, 184558. , 415255.5], \ [ 22788.5, 91154. , 205096.5]]) >>> new_a, new_b, array_x, error_opt = symmetric(array_a, array_b, translate=True, scale=True) >>> array_x # symmetric transformation array array([[0.0166352 , 0.06654081, 0.14971682], [0.06654081, 0.26616324, 0.59886729], [0.14971682, 0.59886729, 1.34745141]]) >>> error_opt # error 4.483083428047388e-31 """ # check inputs new_a, new_b = setup_input_arrays(array_a, array_b, remove_zero_col, remove_zero_row, pad_mode, translate, scale, check_finite) if new_a.shape[0] < new_a.shape[1]: raise ValueError("Array A with size (m, n) needs m >= to n.") if new_b.shape[0] < new_b.shape[1]: raise ValueError("Array B with size (m, n) needs m >= to n.") # compute SVD of new_a array_n = new_a.shape[1] array_u, array_s, array_vt = np.linalg.svd(new_a) array_c = np.dot(np.dot(array_u.T, new_b), array_vt.T) # create the intermediate array Y and the optimum symmetric transformation array X array_y = np.zeros((array_n, array_n)) for i in range(array_n): for j in range(array_n): if array_s[i] ** 2 + array_s[j] ** 2 == 0: array_y[i, j] = 0 else: array_y[i, j] = (array_s[i] * array_c[i, j] + array_s[j] * array_c[j, i]) / ( array_s[i] ** 2 + array_s[j] ** 2) array_x = np.dot(np.dot(array_vt.T, array_y), array_vt) e_opt = error(new_a, new_b, array_x) return new_a, new_b, array_x, e_opt
def orthogonal_2sided(array_a, array_b, remove_zero_col=True, remove_zero_row=True, pad_mode='row-col', translate=False, scale=False, single_transform=True, mode="exact", check_finite=True, tol=1.0e-8): r""" Two-Sided Orthogonal Procrustes. Parameters ---------- array_a : ndarray The 2d-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed. array_b : ndarray The 2d-array :math:`\mathbf{B}_{m \times n}` representing the reference array. remove_zero_col : bool, optional If True, zero columns (values less than 1e-8) on the right side will be removed. Default=True. remove_zero_row : bool, optional If True, zero rows (values less than 1e-8) on the bottom will be removed. Default= True. pad_mode : str, optional Specifying how to pad the arrays, listed below. Default="row-col". - "row" The array with fewer rows is padded with zero rows so that both have the same number of rows. - "col" The array with fewer columns is padded with zero columns so that both have the same number of columns. - "row-col" The array with fewer rows is padded with zero rows, and the array with fewer columns is padded with zero columns, so that both have the same dimensions. This does not necessarily result in square arrays. - "square" The arrays are padded with zero rows and zero columns so that they are both squared arrays. The dimension of square array is specified based on the highest dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`. translate : bool, optional If True, both arrays are translated to be centered at origin. Default=False. scale : bool, optional If True, both arrays are column normalized to unity. Default=False. single_transform : bool If True, two-sided orthogonal Procrustes with one transformation will be performed. Default=False. mode : string, optional The scheme to solve for unitary transformation. Options: 'exact' and 'approx'. Default="exact". check_finite : bool, optional If true, convert the input to an array, checking for NaNs or Infs. Default=True. tol : float, optional The tolerance value used for 'approx' mode. Default=1.e-8. Returns ------- array_a : ndarray The transformed ndarray :math:`A`. array_b : ndarray The transformed ndarray :math:`B`. u_opt1 : ndarray The optimal orthogonal left-multiplying transformation ndarray if "single_transform=True". u_opt2 : ndarray The second transformation ndarray if "single_transform=True". u_opt : ndarray The transformation ndarray if "single_transform=False". e_opt : float The single- or double- sided orthogonal Procrustes error. Raises ------ ValueError When input array :math:`A` or :math:`A` is not symmetric. numpy.linalg.LinAlgError If array :math:`A` or :math:`A` is not diagonalizable when `mode='umeyama'` or `mode='umeyama_approx'`. ValueError If the mode is not 'exact' or 'approx' when `single_transform=True`. Notes ----- **Two-Sided Orthogonal Procrustes:** Given matrix :math:`\mathbf{A}_{m \times n}` and a reference :math:`\mathbf{B}_{m \times n}`, find two unitary/orthogonal transformation of :math:`\mathbf{A}_{m \times n}` that makes it as as close as possible to :math:`\mathbf{B}_{m \times n}`. I.e., .. math:: \underbrace{\text{min}}_{\left\{ {\mathbf{U}_1 \atop \mathbf{U}_2} \left| {\mathbf{U}_1^{-1} = \mathbf{U}_1^\dagger \atop \mathbf{U}_2^{-1} = \mathbf{U}_2^\dagger} \right. \right\}} \|\mathbf{U}_1^\dagger \mathbf{A} \mathbf{U}_2 - \mathbf{B}\|_{F}^2 &= \underbrace{\text{min}}_{\left\{ {\mathbf{U}_1 \atop \mathbf{U}_2} \left| {\mathbf{U}_1^{-1} = \mathbf{U}_1^\dagger \atop \mathbf{U}_2^{-1} = \mathbf{U}_2^\dagger} \right. \right\}} \text{Tr}\left[\left(\mathbf{U}_1^\dagger\mathbf{A}\mathbf{U}_2 - \mathbf{B} \right)^\dagger \left(\mathbf{U}_1^\dagger\mathbf{A}\mathbf{U}_2 - \mathbf{B} \right)\right] \\ &= \underbrace{\text{min}}_{\left\{ {\mathbf{U}_1 \atop \mathbf{U}_2} \left| {\mathbf{U}_1^{-1} = \mathbf{U}_1^\dagger \atop \mathbf{U}_2^{-1} = \mathbf{U}_2^\dagger} \right. \right\}} \text{Tr}\left[\mathbf{U}_2^\dagger\mathbf{A}^\dagger\mathbf{U}_1\mathbf{B} \right] We can get the solution by taking singular value decomposition of the matrices. Having, .. math:: \mathbf{A} = \mathbf{U}_A \mathbf{\Sigma}_A \mathbf{V}_A^\dagger \\ \mathbf{B} = \mathbf{U}_B \mathbf{\Sigma}_B \mathbf{V}_B^\dagger The transformation is foubd by, .. math:: \mathbf{U}_1 = \mathbf{U}_A \mathbf{U}_B^\dagger \\ \mathbf{U}_2 = \mathbf{V}_B \mathbf{V}_B^\dagger **Two-Sided Orthogonal Procrustes with Single-Transformation:** Given matrix :math:`\mathbf{A}_{n \times n}` and a reference :math:`\mathbf{B}_{n \times n}`, find one unitary/orthogonal transformation matrix :math:`\mathbf{U}_{n \times n}` that makes :math:`\mathbf{A}_{n \times n}` as close as possible to :math:`\mathbf{B}_{n \times n}`. I.e., .. math:: \underbrace{\min}_{\left\{\mathbf{U} | \mathbf{U}^{-1} = {\mathbf{U}}^\dagger \right\}} \|\mathbf{U}^\dagger\mathbf{A}\mathbf{U} - \mathbf{B}\|_{F}^2 &= \underbrace{\text{min}}_{\left\{\mathbf{U} | \mathbf{U}^{-1} = {\mathbf{U}}^\dagger \right\}} \text{Tr}\left[\left(\mathbf{U}^\dagger\mathbf{A}\mathbf{U} - \mathbf{B} \right)^\dagger \left(\mathbf{U}^\dagger\mathbf{A}\mathbf{U} - \mathbf{B} \right)\right] \\ &= \underbrace{\text{max}}_{\left\{\mathbf{U} | \mathbf{U}^{-1} = {\mathbf{U}}^\dagger \right\}} \text{Tr}\left[\mathbf{U}^\dagger\mathbf{A}^\dagger\mathbf{U}\mathbf{B} \right] Taking the eigenvalue decomposition of the matrices: .. math:: \mathbf{A} = \mathbf{U}_A \mathbf{\Lambda}_A \mathbf{U}_A^\dagger \\ \mathbf{B} = \mathbf{U}_B \mathbf{\Lambda}_B \mathbf{U}_B^\dagger the solution is obtained by, .. math:: \mathbf{U} = \mathbf{U}_A \mathbf{S} \mathbf{U}_A^\dagger where :math:`\mathbf{S}` is a diagonal matrix for which every diagonal element is :math:`\pm{1}`, .. math:: \mathbf{S} = \begin{bmatrix} { \pm 1} & 0 &\cdots &0 \\ 0 &{ \pm 1} &\ddots &\vdots \\ \vdots &\ddots &\ddots &0\\ 0 &\cdots &0 &{ \pm 1} \end{bmatrix} The matrix :math:`\mathbf{S}` is chosen to be the identity matrix. Please note that the translation operation is not well defined for two sided orthogonal procrustes since two sided rotation and translation don't commute. Therefore, please be careful when setting translate=True. Examples -------- >>> import numpy as np >>> array_a = np.array([[30, 33, 20], [33, 53, 43], [20, 43, 46]]) >>> array_b = np.array([ \ [ 22.78131838, -0.58896768,-43.00635291, 0., 0.], \ [ -0.58896768, 16.77132475, 0.24289990, 0., 0.], \ [-43.00635291, 0.2428999 , 89.44735687, 0., 0.], \ [ 0. , 0. , 0. , 0., 0.]]) >>> new_a, new_b, array_u, error_opt = orthogonal_2sided( \ array_a, array_b, single_transform=True, \ remove_zero_col=True, remove_zero_rwo=True, mode='exact') >>> array_u array([[ 0.25116633, 0.76371527, 0.59468855], [-0.95144277, 0.08183302, 0.29674906], [ 0.17796663, -0.64034549, 0.74718507]]) >>> error_opt 1.9646186414076689e-26 """ # Check symmetry if single_transform=True if single_transform: if not np.allclose(array_a.T, array_a): raise ValueError("array_a should be symmetric.") if not np.allclose(array_b.T, array_b): raise ValueError("array_b should be symmetric.") if translate: warnings.warn("The translation matrix was not well defined. \ Two sided rotation and translation don't commute.", stacklevel=2) # Check inputs array_a, array_b = setup_input_arrays(array_a, array_b, remove_zero_col, remove_zero_row, pad_mode, translate, scale, check_finite) # Convert mode strings into lowercase mode = mode.lower() # Do single-transformation computation if requested if single_transform: # check array_a and array_b are symmetric. #FIXME : They are no checks here. if mode == "approx": u_opt = _2sided_1trans_approx(array_a, array_b, tol) elif mode == "exact": u_opt = _2sided_1trans_exact(array_a, array_b) else: raise ValueError("Invalid mode argument (use 'exact' or 'approx')") # the error e_opt = error(array_a, array_b, u_opt, u_opt) return array_a, array_b, u_opt, e_opt # Do regular two-sided orthogonal Procrustes calculations else: u_opt1, u_opt2 = _2sided(array_a, array_b) e_opt = error(array_a, array_b, u_opt1, u_opt2) return array_a, array_b, u_opt1, u_opt2, e_opt
def softassign(array_a, array_b, iteration_soft=50, iteration_sink=200, beta_r=1.10, beta_f=1.e5, epsilon=0.05, epsilon_soft=1.e-3, epsilon_sink=1.e-3, k=0.15, gamma_scaler=1.01, n_stop=3, pad_mode='row-col', remove_zero_col=True, remove_zero_row=True, translate=False, scale=False, check_finite=True, adapted=True, beta_0=None, m_guess=None, iteration_anneal=None): r""" Find the transformation matrix for 2-sided permutation Procrustes with softassign algorithm. Parameters ---------- array_a : numpy.ndarray The 2d-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed. array_b : numpy.ndarray The 2d-array :math:`\mathbf{B}_{m \times n}` representing the reference. iteration_soft : int, optional Number of iterations for softassign loop. Default=50. iteration_sink : int, optional Number of iterations for Sinkhorn loop. Default=50. linear_cost_func : numpy.ndarray Linear cost function. Default=0. beta_r : float, optional Annealing rate which should greater than 1. Default=1.10. beta_f : float, optional The final inverse temperature. Default=1.e5. epsilon : float, optional The tolerance value for annealing loop. Default=0.05. epsilon_soft : float, optional The tolerance value used for softassign. Default=1.e-3. epsilon_sink : float, optional The tolerance value used for Sinkhorn loop. If adapted version is used, it will use the adapted tolerance value for Sinkhorn instead. Default=1.e-3. k : float, optional This parameter controls how much tighter the coverage threshold for the interior loop should be than the coverage threshold for the loops outside. It has be be within the integral :math:`\(0,1\)`. Default=0.15. gamma_scaler : float This parameter ensures the quadratic cost function including self-amplification positive define. Default=1.01. n_stop : int, optional Number of running steps after the calculation converges in the relaxation procedure. Default=10. pad_mode : str, optional Specifying how to pad the arrays, listed below. Default="row-col". - "row" The array with fewer rows is padded with zero rows so that both have the same number of rows. - "col" The array with fewer columns is padded with zero columns so that both have the same number of columns. - "row-col" The array with fewer rows is padded with zero rows, and the array with fewer columns is padded with zero columns, so that both have the same dimensions. This does not necessarily result in square arrays. - "square" The arrays are padded with zero rows and zero columns so that they are both squared arrays. The dimension of square array is specified based on the highest dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`. remove_zero_col : bool, optional If True, zero columns (values less than 1e-8) on the right side will be removed. Default=True. remove_zero_row : bool, optional If True, zero rows (values less than 1e-8) on the bottom will be removed. Default=True. translate : bool, optional If True, both arrays are translated to be centered at origin, ie columns of the arrays will have mean zero. Default=False. scale : bool, optional If True, both arrays are normalized to one with respect to the Frobenius norm, ie :math:`Tr(A^T A) = 1`. Default=False. check_finite : bool, optional If true, convert the input to an array, checking for NaNs or Infs. Default=True. adapted : bool, optional If adapted, this function will use the tighter covergence threshold for the interior loops. Default=True. beta_0 : float, optional Initial inverse temperature. Default=None. beta_f : float, optional Final inverse temperature. Default=None. m_guess : numpy.ndarray, optional The initial guess of the doubly-stochastic matrix. Default=None. iteration_anneal : int, optional Number of iterations for annealing loop. Default=None. Returns ------- new_a : numpy.ndarray The transformed numpy.ndarray A. new_b : numpy.ndarray The transformed numpy.ndarray B. m_ai : numpy.ndarray The optimum permutation transformation matrix. e_opt : float Two-sided Procrustes error. Notes ----- Quadratic assignment problem (QAP) has played a very special but fundamental role in combinatorial optimization problems. The problem can be defined as a optimization problem to minimize the cost to assign a set of facilities to a set of locations. The cost is a function of the flow between the facilities and the geographical distances among various facilities. The objective function (also named loss function in machine learning) is defined as [1]_ .. math:: E_{qap}(M, \mu, \nu) = - \frac{1}{2}\Sigma_{aibj}C_{ai;bj}M_{ai}M_{bj} + \Sigma_{a}{\mu}_a (\Sigma_i M_{ai} -1) \\ + \Sigma_i {\nu}_i (\Sigma_i M_{ai} -1) - \frac{\gamma}{2}\Sigma_{ai} {M_{ai}}^2 + \frac{1}{\beta} \Sigma_{ai} M_{ai}\log{M_{ai}} where :math:`C_{ai,bj}` is the benefit matrix, :math:`M` is the desired :math:`N \times N` permutation matrix. :math:`E` is the energy function which comes along with a self-amplification term with `\gamma`, two Lagrange parameters :math:`\mu` and :math:`\nu` for constrained optimization and :math:`M_{ai} \log{M_{ai}}` servers as a barrier function which ensures positivity of :math:`M_{ai}`. The inverse temperature :math:`\beta` is a deterministic annealing control parameter. More detailed information about the algorithm can be referred to Rangarajan's paper and the details about the algorithm can be found at [2]_. References ---------- .. [1] Rangarajan, Anand and Yuille, Alan L and Gold, Steven and Mjolsness, Eric, "A convergence proof for the softassign quadratic assignment algorithm", Advances in Neural Information Processing Systems, page 620-626, 1997. .. [2] Stefan Roth, "Analysis of a Deterministic Annealing Method for Graph Matching and Quadratic Assignment", Ph.D. thesis, University of Mannheim, 2001 Examples -------- >>> import numpy as np >>> array_a = np.array([[4, 5, 3, 3], [5, 7, 3, 5],\ [3, 3, 2, 2], [3, 5, 2, 5]]) # define a random matrix >>> perm = np.array([[0., 0., 1., 0.], [1., 0., 0., 0.],\ [0., 0., 0., 1.], [0., 1., 0., 0.]]) # define array_b by permuting array_a >>> array_b = np.dot(perm.T, np.dot(array_a, perm)) >>> new_a, new_b, M_ai, e_opt = softassign(array_a, array_b,\ remove_zero_col=False,\ remove_zero_row=False) >>> M_ai # the permutation matrix array([[0., 0., 1., 0.], [1., 0., 0., 0.], [0., 0., 0., 1.], [0., 1., 0., 0.]]) >>> e_opt # the error 0.0 """ # pylint: disable-msg=too-many-arguments # pylint: disable-msg=too-many-branches # todo: add linear_cost_func with default value 0 # Check beta_r if beta_r <= 1: raise ValueError("Argument beta_r cannot be less than 1.") array_a, array_b = setup_input_arrays(array_a, array_b, remove_zero_col, remove_zero_row, pad_mode, translate, scale, check_finite) # Initialization # Compute the benefit matrix array_c = np.kron(array_a, array_b) # Get the shape of A (B and the permutation matrix as well) row_num = array_a.shape[0] c_tensor = array_c.reshape(row_num, row_num, row_num, row_num) # Compute the beta_0 gamma = _compute_gamma(array_c, row_num, gamma_scaler) if beta_0 is None: c_gamma = array_c + gamma * (np.identity(row_num * row_num)) eival_gamma = np.amax(np.abs(np.linalg.eigvalsh(c_gamma))) beta_0 = gamma_scaler * max(1.e-10, eival_gamma / row_num) beta_0 = 1 / beta_0 else: beta_0 *= row_num beta = beta_0 # We will use iteration_anneal if provided even if the final inverse temperature is specified # iteration_anneal is not None, beta_f can be None or not if iteration_anneal is not None: beta_f = beta_0 * np.power(beta_r, iteration_anneal) * row_num # iteration_anneal is None and beta_f is not None elif iteration_anneal is None and beta_f is not None: beta_f *= row_num # Both iteration_anneal and beta_f are None else: raise ValueError("We must specify at least one of iteration_anneal and beta_f and " "specify only one is recommended.") # Initialization of m_ai # check shape of m_guess if m_guess is not None: if np.any(m_guess < 0): raise ValueError( "The initial guess of permutation matrix cannot contain any negative values.") if m_guess.shape[0] == row_num and m_guess.shape[1] == row_num: array_m = m_guess else: warnings.warn("The shape of m_guess does not match ({0}, {0})." "Use random initial guess instead.".format(row_num)) array_m = np.abs(np.random.normal(loc=1.0, scale=0.1, size=(row_num, row_num))) else: # m_relax_old = 1 / N + np.random.rand(N, N) array_m = np.abs(np.random.normal(loc=1.0, scale=0.1, size=(row_num, row_num))) array_m[array_m < 0] = 0 array_m = array_m / row_num nochange = 0 if adapted: epsilon_sink = epsilon_soft * k while beta < beta_f: # relaxation m_old_beta = deepcopy(array_m) # softassign loop for _ in np.arange(iteration_soft): m_old_soft = deepcopy(array_m) # Compute Z in relaxation step # C_gamma_tensor = C_gamma.reshape(N, N, N, N) # Z = -np.einsum('ijkl,jl->ik', C_gamma_tensor, M) # Z -= linear_cost_func array_z = np.einsum('aibj,bj->ai', c_tensor, array_m) array_z += gamma * array_m # soft_assign array_m = np.exp(beta * array_z) # Sinkhorn loop for _ in np.arange(iteration_sink): # Row normalization array_m = array_m / array_m.sum(axis=1, keepdims=1) # Column normalization array_m = array_m / array_m.sum(axis=0, keepdims=1) # Compute the delata_M_sink if np.amax(np.abs(array_m.sum(axis=1, keepdims=1) - 1)) < epsilon_sink: array_m = array_m / array_m.sum(axis=1, keepdims=1) break change_soft = np.amax(np.abs(array_m - m_old_soft)) # pylint: disable-msg=no-else-break if change_soft < epsilon_soft: break else: if adapted: epsilon_sink = change_soft * k else: continue change_annealing = np.amax(np.abs(array_m - m_old_beta)) if change_annealing < epsilon: nochange += 1 if nochange > n_stop: break else: nochange = 0 beta *= beta_r if adapted: epsilon_soft = change_soft * k epsilon_sink = epsilon_soft * k # Compute the error _, _, array_m, _ = permutation(np.eye(array_m.shape[0]), array_m) e_opt = error(array_a, array_b, array_m, array_m) return array_a, array_b, array_m, e_opt
def _2sided_regular(M, N, tol, iteration): """ """ # :math:` {\(\vert M-PNQ \vert\)}^2_F` # taken from page 64 in # parallel solution of svd-related problems, with applications # Pythagoras Papadimitriou, University of Manchester, 1993 # Fix P = I first # Initial guess for P P1 = np.eye(M.shape[0], M.shape[0]) # Initial guess for Q Q1 = _2sided_Hungarian(np.dot(N.T, M)) e_opt1 = error(N, M, P1.T, Q1) step1 = 0 # while loop for the original algorithm while (e_opt1 > tol and step1 < iteration): step1 += 1 # Update P P1 = _2sided_Hungarian(np.dot(np.dot(N, Q1), M.T)) P1 = np.transpose(P1) # Update the error e_opt1 = error(N, M, P1.T, Q1) if e_opt1 <= tol: break else: # Update Q Q1 = _2sided_Hungarian(np.dot(np.dot(N.T, P1.T), M)) # Update the error e_opt1 = error(N, M, P1.T, Q1) if step1 == iteration: print('Maximum iteration reached in the first case! \ Error={0}'.format(e_opt1)) # Fix Q = I first # Initial guess for Q Q2 = np.eye(M.shape[1], M.shape[1]) # Initial guess for P P2 = _2sided_Hungarian(np.dot(N, M.T)) P2 = np.transpose(P2) e_opt2 = error(N, M, P2.T, Q2) step2 = 0 # while loop for the original algorithm while (e_opt2 > tol and step2 < iteration): # Update Q Q2 = _2sided_Hungarian(np.dot(np.dot(N.T, P2.T), M)) # Update the error e_opt2 = error(N, M, P2.T, Q1) if e_opt2 <= tol: break else: P2 = _2sided_Hungarian(np.dot(np.dot(N, Q2), M.T)) P2 = np.transpose(P2) # Update the error e_opt2 = error(N, M, P2.T, Q2) step2 += 1 if step2 == iteration: print('Maximum iteration reached in the second case! \ Error={0}'.format(e_opt2)) if e_opt1 <= e_opt2: P = P1 Q = Q1 e_opt = e_opt1 else: P = P2 Q = Q2 e_opt = e_opt2 return P, Q, e_opt
def orthogonal(array_a, array_b, remove_zero_col=True, remove_zero_row=True, pad_mode='row-col', translate=False, scale=False, check_finite=True): r""" One-sided orthogonal Procrustes. The Procrustes analysis requires two 2d-arrays with the same number of rows, so the array with the smaller number of rows will automatically be padded with zero rows. Parameters ---------- array_a : ndarray The 2d-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed. array_b : ndarray The 2d-array :math:`\mathbf{B}_{m \times n}` representing the reference array. remove_zero_col : bool, optional If True, the zero columns on the right side will be removed. Default= True. remove_zero_row : bool, optional If True, the zero rows on the top will be removed. Default= True. pad_mode : str, optional Specifying how to pad the arrays, listed below. Default="row-col". - "row" The array with fewer rows is padded with zero rows so that both have the same number of rows. - "col" The array with fewer columns is padded with zero columns so that both have the same number of columns. - "row-col" The array with fewer rows is padded with zero rows, and the array with fewer columns is padded with zero columns, so that both have the same dimensions. This does not necessarily result in square arrays. - "square" The arrays are padded with zero rows and zero columns so that they are both squared arrays. The dimension of square array is specified based on the highest dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`. translate : bool, optional If True, both arrays are translated to be centered at origin, ie columns of the arrays will have mean zero. Default=False. scale : bool, optional If True, both arrays are normalized to one with respect to the Frobenius norm, ie :math:`Tr(A^T A) = 1`. Default=False. check_finite : bool, optional If true, convert the input to an array, checking for NaNs or Infs. Default=True. Returns ------- new_a : ndarray The transformed ndarray :math:`A`. new_b : ndarray The transformed ndarray :math:`B`. u_opt : ndarray The optimum transformation matrix. e_opt : float One-sided orthogonal Procrustes error. Notes ----- Given matrix :math:`\mathbf{A}_{m \times n}` and a reference :math:`\mathbf{B}_{m \times n}`, find the unitary/orthogonal transformation matrix :math:`\mathbf{U}_{n \times n}` that makes :math:`\mathbf{A}_{m \times n}` as close as possible to :math:`\mathbf{B}_{m \times n}`. I.e., .. math:: \underbrace{\min}_{\left\{\mathbf{U} | \mathbf{U}^{-1} = {\mathbf{U}}^\dagger \right\}} \|\mathbf{A}\mathbf{U} - \mathbf{B}\|_{F}^2 &= \underbrace{\text{min}}_{\left\{\mathbf{U} | \mathbf{U}^{-1} = {\mathbf{U}}^\dagger \right\}} \text{Tr}\left[\left(\mathbf{A}\mathbf{U} - \mathbf{B} \right)^\dagger \left(\mathbf{A}\mathbf{U} - \mathbf{B} \right)\right] \\ &= \underbrace{\text{max}}_{\left\{\mathbf{U} | \mathbf{U}^{-1} = {\mathbf{U}}^\dagger \right\}} \text{Tr}\left[\mathbf{U}^\dagger {\mathbf{A}}^\dagger \mathbf{B} \right] The solution is obtained by taking the singular value decomposition (SVD) of the product of the matrices, .. math:: \mathbf{A}^\dagger \mathbf{B} &= \tilde{\mathbf{U}} \tilde{\mathbf{\Sigma}} \tilde{\mathbf{V}}^{\dagger} \\ \mathbf{U}_{\text{optimum}} &= \tilde{\mathbf{U}} \tilde{\mathbf{V}}^{\dagger} The singular values are always listed in decreasing order, with the smallest singular value in the bottom-right-hand corner of :math:`\tilde{\mathbf{\Sigma}}`. Examples -------- >>> import numpy as np >>> array_a = np.array([[-7.3, 2.8], [-7.1, -0.2], [ 4. , 1.4], [ 1.3, 0. ]]) >>> array_b = np.array([[-5.90207845, -5.12791088], [-6.74021234, -2.24043246], [ 4.23759847, 0.05252849], [ 1.22159856, 0.44463126]]) >>> new_a, new_b, array_u, error_opt = orthogonal(array_a, array_b) >>> new_a array([[-7.3, 2.8], [-7.1, -0.2], [ 4. , 1.4], [ 1.3, 0. ]]) >>> new_b array([[-5.90207845, -5.12791088], [-6.74021234, -2.24043246], [ 4.23759847, 0.05252849], [ 1.22159856, 0.44463126]]) >>> array_u # the optimum orthogonal transformation array array([[ 0.9396912 , 0.34202404], [ 0.34202404, -0.9396912 ]]) >>> error_opt #error 1.435973366535123e-29 """ # check inputs new_a, new_b = setup_input_arrays(array_a, array_b, remove_zero_col, remove_zero_row, pad_mode, translate, scale, check_finite) # calculate SVD of array_a.T * array_b array_u, _, array_vt = np.linalg.svd(np.dot(new_a.T, new_b)) # compute optimum orthogonal transformation array_u_opt = np.dot(array_u, array_vt) # compute the error e_opt = error(new_a, new_b, array_u_opt) return new_a, new_b, array_u_opt, e_opt