def check_pairwise_arrays(X, Y, precomputed=False, dtype=None): X, Y, dtype_float = pairwise._return_float_dtype(X, Y) warn_on_dtype = dtype is not None estimator = 'check_pairwise_arrays' if dtype is None: dtype = dtype_float if Y is X or Y is None: X = Y = validation.check_array(X, accept_sparse='csr', dtype=dtype, warn_on_dtype=warn_on_dtype, estimator=estimator) else: X = validation.check_array(X, accept_sparse='csr', dtype=dtype, warn_on_dtype=warn_on_dtype, estimator=estimator) Y = validation.check_array(Y, accept_sparse='csr', dtype=dtype, warn_on_dtype=warn_on_dtype, estimator=estimator) if precomputed: if X.shape[1] != Y.shape[0]: raise ValueError("Precomputed metric requires shape " "(n_queries, n_indexed). Got (%d, %d) " "for %d indexed." % (X.shape[0], X.shape[1], Y.shape[0])) elif X.shape[1] != Y.shape[1]: raise ValueError("Incompatible dimension for X and Y matrices: " "X.shape[1] == %d while Y.shape[1] == %d" % ( X.shape[1], Y.shape[1])) return X, Y
def check_pairwise_arrays(X, Y, precomputed=False, dtype=None, accept_sparse='csr', force_all_finite=True, copy=False): """ Set X and Y appropriately and checks inputs If Y is None, it is set as a pointer to X (i.e. not a copy). If Y is given, this does not happen. All distance metrics should use this function first to assert that the given parameters are correct and safe to use. Specifically, this function first ensures that both X and Y are arrays, then checks that they are at least two dimensional while ensuring that their elements are floats (or dtype if provided). Finally, the function checks that the size of the second dimension of the two arrays is equal, or the equivalent check for a precomputed distance matrix. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples_a, n_features) Y : {array-like, sparse matrix}, shape (n_samples_b, n_features) precomputed : bool True if X is to be treated as precomputed distances to the samples in Y. dtype : string, type, list of types or None (default=None) Data type required for X and Y. If None, the dtype will be an appropriate float type selected by _return_float_dtype. .. versionadded:: 0.18 accept_sparse : string, boolean or list/tuple of strings String[s] representing allowed sparse matrix formats, such as 'csc', 'csr', etc. If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input to be any format. False means that a sparse matrix input will raise an error. force_all_finite : bool Whether to raise an error on np.inf and np.nan in X (or Y if it exists) copy : bool Whether a forced copy will be triggered. If copy=False, a copy might be triggered by a conversion. Returns ------- safe_X : {array-like, sparse matrix}, shape (n_samples_a, n_features) An array equal to X, guaranteed to be a numpy array. safe_Y : {array-like, sparse matrix}, shape (n_samples_b, n_features) An array equal to Y if Y was not None, guaranteed to be a numpy array. If Y was None, safe_Y will be a pointer to X. """ X, Y, dtype_float = _return_float_dtype(X, Y) warn_on_dtype = dtype is not None estimator = 'check_pairwise_arrays' if dtype is None: dtype = dtype_float if Y is X or Y is None: X = Y = check_array(X, accept_sparse=accept_sparse, dtype=dtype, copy=copy, force_all_finite=force_all_finite, warn_on_dtype=warn_on_dtype, estimator=estimator) else: X = check_array(X, accept_sparse=accept_sparse, dtype=dtype, copy=copy, force_all_finite=force_all_finite, warn_on_dtype=warn_on_dtype, estimator=estimator) Y = check_array(Y, accept_sparse=accept_sparse, dtype=dtype, copy=copy, force_all_finite=force_all_finite, warn_on_dtype=warn_on_dtype, estimator=estimator) if precomputed: if X.shape[1] != Y.shape[0]: raise ValueError("Precomputed metric requires shape " "(n_queries, n_indexed). Got (%d, %d) " "for %d indexed." % (X.shape[0], X.shape[1], Y.shape[0])) elif X.shape[1] != Y.shape[1]: raise ValueError("Incompatible dimension for X and Y matrices: " "X.shape[1] == %d while Y.shape[1] == %d" % ( X.shape[1], Y.shape[1])) return X, Y