def pairwise_distance_differences(high_distances=None, low_distances=None, high_data=None, low_data=None, metric='euclidean'): ''' Computes $d_{ij}-||x_{i}-x_{j}||$. Computes pairwise distances in the high space and low space if they weren't passed in. Returns: (high_distances, low_distances, distance_difference) ------- high_distances: np array of pairwise distances between high_data points low_distances: np array of pairwise distances between low_data points distance_difference: np array high_distances - low_distances >>> a = np.array([[7, 4, 0], [4, 5, 2], [9, 4, 3]]) >>> b = np.array([[0, 6], [7, 1], [4, 9]]) >>> pairwise_distance_differences(high_data=a, low_data=b)[0] array([[0. , 3.74165739, 3.60555128], [3.74165739, 0. , 5.19615242], [3.60555128, 5.19615242, 0. ]]) >>> pairwise_distance_differences(high_data=a, low_data=b)[1] array([[0. , 8.60232527, 5. ], [8.60232527, 0. , 8.54400375], [5. , 8.54400375, 0. ]]) >>> pairwise_distance_differences(high_data=a, low_data=b)[2] array([[ 0. , -4.86066788, -1.39444872], [-4.86066788, 0. , -3.34785132], [-1.39444872, -3.34785132, 0. ]]) >>> a = np.array([[0, 4, 7], [4, 0, 2], [7, 2, 0]]) >>> b = np.array([[0, 4, 8], [4, 0, 1], [8, 1, 0]]) >>> pairwise_distance_differences(high_distances=a, low_distances=b)[0] array([[0, 4, 7], [4, 0, 2], [7, 2, 0]]) >>> pairwise_distance_differences(high_distances=a, low_distances=b)[1] array([[0, 4, 8], [4, 0, 1], [8, 1, 0]]) >>> pairwise_distance_differences(high_distances=a, low_distances=b)[2] array([[ 0, 0, -1], [ 0, 0, 1], [-1, 1, 0]]) ''' if (high_distances is None) and (high_data is None): raise ValueError("One of high_distances or high_data is required") if (low_distances is None) and (low_data is None): raise ValueError("One of low_distances or low_data is required") if low_distances is None: low_distances = sk_pairwise_distances(low_data, metric=metric) if high_distances is None: high_distances = sk_pairwise_distances(high_data, metric=metric) difference_distances = high_distances - low_distances return high_distances, low_distances, difference_distances
def testPairwiseDistancesExecution(self): raw_x = np.random.rand(20, 5) raw_y = np.random.rand(21, 5) x = mt.tensor(raw_x, chunk_size=11) y = mt.tensor(raw_y, chunk_size=12) d = pairwise_distances(x, y) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_pairwise_distances(raw_x, raw_y) np.testing.assert_almost_equal(result, expected) # test precomputed d2 = d.copy() d2[0, 0] = -1 d2 = pairwise_distances(d2, y, metric='precomputed') with self.assertRaises(ValueError): _ = self.executor.execute_tensor(d2, concat=True)[0] # test cdist weight = np.random.rand(5) d = pairwise_distances(x, y, metric='wminkowski', p=3, w=weight) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_pairwise_distances(raw_x, raw_y, metric='wminkowski', p=3, w=weight) np.testing.assert_almost_equal(result, expected) # test pdist d = pairwise_distances(x, metric='hamming') result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_pairwise_distances(raw_x, metric='hamming') np.testing.assert_almost_equal(result, expected) # test function metric m = lambda u, v: np.sqrt(((u - v)**2).sum()) d = pairwise_distances(x, y, metric=m) result = self.executor.execute_tensor(d, concat=True)[0] expected = sk_pairwise_distances(raw_x, raw_y, metric=m) np.testing.assert_almost_equal(result, expected) assert_warns(DataConversionWarning, pairwise_distances, x, y, metric='jaccard') with self.assertRaises(ValueError): _ = pairwise_distances(x, y, metric='unknown')