Ejemplo n.º 1
0
    def compute_distances(self, x1, x2):
        """
        The method uses a function implemented in Cython. Data (`x1` and `x2`)
        is accompanied by two tables. One is a 2-d table in which elements of
        `x1` (`x2`) are replaced by 0's and 1's. The other is a vector
        indicating rows (or column) with nan values.

        The function in Cython uses a fast loop without any conditions to
        compute distances between rows without missing values, and a slower
        loop for those with missing values.
        """
        nonzeros1 = np.not_equal(x1, 0).view(np.int8)
        if self.axis == 1:
            nans1 = _distance.any_nan_row(x1)
            if x2 is None:
                nonzeros2, nans2 = nonzeros1, nans1
            else:
                nonzeros2 = np.not_equal(x2, 0).view(np.int8)
                nans2 = _distance.any_nan_row(x2)
            return _distance.jaccard_rows(
                nonzeros1, nonzeros2,
                x1, x1 if x2 is None else x2,
                nans1, nans2,
                self.ps,
                x2 is not None)
        else:
            nans1 = _distance.any_nan_row(x1.T)
            return _distance.jaccard_cols(
                nonzeros1, x1, nans1, self.ps)
Ejemplo n.º 2
0
    def compute_distances(self, x1, x2):
        """
        The method uses a function implemented in Cython. Data (`x1` and `x2`)
        is accompanied by two tables. One is a 2-d table in which elements of
        `x1` (`x2`) are replaced by 0's and 1's. The other is a vector
        indicating rows (or column) with nan values.

        The function in Cython uses a fast loop without any conditions to
        compute distances between rows without missing values, and a slower
        loop for those with missing values.
        """
        nonzeros1 = np.not_equal(x1, 0).view(np.int8)
        if self.axis == 1:
            nans1 = _distance.any_nan_row(x1)
            if x2 is None:
                nonzeros2, nans2 = nonzeros1, nans1
            else:
                nonzeros2 = np.not_equal(x2, 0).view(np.int8)
                nans2 = _distance.any_nan_row(x2)
            return _distance.jaccard_rows(nonzeros1, nonzeros2, x1,
                                          x1 if x2 is None else x2, nans1,
                                          nans2, self.ps, x2 is not None)
        else:
            nans1 = _distance.any_nan_row(x1.T)
            return _distance.jaccard_cols(nonzeros1, x1, nans1, self.ps)
Ejemplo n.º 3
0
    def _compute_dense(self, x1, x2):
        """
        The method uses a function implemented in Cython. Data (`x1` and `x2`)
        is accompanied by two tables. One is a 2-d table in which elements of
        `x1` (`x2`) are replaced by 0's and 1's. The other is a vector
        indicating rows (or column) with nan values.

        The function in Cython uses a fast loop without any conditions to
        compute distances between rows without missing values, and a slower
        loop for those with missing values.
        """
        # view is false positive, pylint: disable=no-member
        nonzeros1 = np.not_equal(x1, 0).view(np.int8)
        if self.axis == 1:
            weights = [5, 45, 50] if x2 is None else [5, 5, 45, 45]
            callbacks = StepwiseCallbacks(self.callback, weights)

            nans1 = _distance.any_nan_row(x1, callbacks.next())
            if x2 is None:
                nonzeros2, nans2 = nonzeros1, nans1
            else:
                nonzeros2 = np.not_equal(x2, 0).view(np.int8)
                nans2 = _distance.any_nan_row(x2, callbacks.next())
            return _distance.jaccard_rows(nonzeros1, nonzeros2, x1,
                                          x1 if x2 is None else x2, nans1,
                                          nans2, self.ps, x2 is not None,
                                          callbacks.next(), callbacks.next())
        else:
            callbacks = StepwiseCallbacks(self.callback, [10, 90])
            nans1 = _distance.any_nan_row(x1.T, callbacks.next())
            return _distance.jaccard_cols(nonzeros1, x1, nans1, self.ps,
                                          callbacks.next())