def pairwise_distance_differences(high_distances=None,
                                  low_distances=None,
                                  high_data=None,
                                  low_data=None,
                                  metric='euclidean'):
    '''
    Computes $d_{ij}-||x_{i}-x_{j}||$. Computes pairwise distances in the
    high space and low space if they weren't passed in.

    Returns: (high_distances, low_distances, distance_difference)
    -------
    high_distances: np array of pairwise distances between high_data points
    low_distances: np array of pairwise distances between low_data points
    distance_difference: np array high_distances - low_distances

    >>> a = np.array([[7, 4, 0], [4, 5, 2], [9, 4, 3]])
    >>> b = np.array([[0, 6], [7, 1], [4, 9]])
    >>> pairwise_distance_differences(high_data=a, low_data=b)[0]
    array([[0.        , 3.74165739, 3.60555128],
           [3.74165739, 0.        , 5.19615242],
           [3.60555128, 5.19615242, 0.        ]])
    >>> pairwise_distance_differences(high_data=a, low_data=b)[1]
    array([[0.        , 8.60232527, 5.        ],
           [8.60232527, 0.        , 8.54400375],
           [5.        , 8.54400375, 0.        ]])
    >>> pairwise_distance_differences(high_data=a, low_data=b)[2]
    array([[ 0.        , -4.86066788, -1.39444872],
           [-4.86066788,  0.        , -3.34785132],
           [-1.39444872, -3.34785132,  0.        ]])

    >>> a = np.array([[0, 4, 7], [4, 0, 2], [7, 2, 0]])
    >>> b = np.array([[0, 4, 8], [4, 0, 1], [8, 1, 0]])
    >>> pairwise_distance_differences(high_distances=a, low_distances=b)[0]
    array([[0, 4, 7],
           [4, 0, 2],
           [7, 2, 0]])
    >>> pairwise_distance_differences(high_distances=a, low_distances=b)[1]
    array([[0, 4, 8],
           [4, 0, 1],
           [8, 1, 0]])
    >>> pairwise_distance_differences(high_distances=a, low_distances=b)[2]
    array([[ 0,  0, -1],
           [ 0,  0,  1],
           [-1,  1,  0]])
    '''
    if (high_distances is None) and (high_data is None):
        raise ValueError("One of high_distances or high_data is required")
    if (low_distances is None) and (low_data is None):
        raise ValueError("One of low_distances or low_data is required")
    if low_distances is None:
        low_distances = sk_pairwise_distances(low_data, metric=metric)
    if high_distances is None:
        high_distances = sk_pairwise_distances(high_data, metric=metric)

    difference_distances = high_distances - low_distances

    return high_distances, low_distances, difference_distances
Esempio n. 2
0
    def testPairwiseDistancesExecution(self):
        raw_x = np.random.rand(20, 5)
        raw_y = np.random.rand(21, 5)

        x = mt.tensor(raw_x, chunk_size=11)
        y = mt.tensor(raw_y, chunk_size=12)

        d = pairwise_distances(x, y)
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x, raw_y)
        np.testing.assert_almost_equal(result, expected)

        # test precomputed
        d2 = d.copy()
        d2[0, 0] = -1
        d2 = pairwise_distances(d2, y, metric='precomputed')
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tensor(d2, concat=True)[0]

        # test cdist
        weight = np.random.rand(5)
        d = pairwise_distances(x, y, metric='wminkowski', p=3, w=weight)
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x,
                                         raw_y,
                                         metric='wminkowski',
                                         p=3,
                                         w=weight)
        np.testing.assert_almost_equal(result, expected)

        # test pdist
        d = pairwise_distances(x, metric='hamming')
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x, metric='hamming')
        np.testing.assert_almost_equal(result, expected)

        # test function metric
        m = lambda u, v: np.sqrt(((u - v)**2).sum())
        d = pairwise_distances(x, y, metric=m)
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x, raw_y, metric=m)
        np.testing.assert_almost_equal(result, expected)

        assert_warns(DataConversionWarning,
                     pairwise_distances,
                     x,
                     y,
                     metric='jaccard')

        with self.assertRaises(ValueError):
            _ = pairwise_distances(x, y, metric='unknown')