コード例 #1
0
def score(D:np.ndarray, target:np.ndarray, k=5, 
          metric:str='distance', test_set_ind:np.ndarray=None, verbose:int=0):
    """Perform `k`-nearest neighbor classification.
    
    Use the ``n x n`` symmetric distance matrix `D` and target class 
    labels `target` to perform a `k`-NN experiment (leave-one-out 
    cross-validation or evaluation of test set; see parameter `test_set_ind`).
    Ties are broken by the nearest neighbor.
    
    Parameters
    ----------
    D : ndarray
        The ``n x n`` symmetric distance (similarity) matrix.
    
    target : ndarray (of dtype=int)
        The ``n x 1`` target class labels (ground truth).
    
    k : int or array_like (of dtype=int), optional (default: 5)
        Neighborhood size for `k`-NN classification.
        For each value in `k`, one `k`-NN experiment is performed.
        
        HINT: Providing more than one value for `k` is a cheap means to perform 
        multiple `k`-NN experiments at once. Try e.g. ``k=[1, 5, 20]``.
    
    metric : {'distance', 'similarity'}, optional (default: 'distance')
        Define, whether matrix `D` is a distance or similarity matrix
    
    test_sed_ind : ndarray, optional (default: None)
        Define data points to be hold out as part of a test set. Can be:
        
        - None : Perform a LOO-CV experiment
        - ndarray : Hold out points indexed in this array as test set. Fit 
          model to remaining data. Evaluate model on test set.
    
    verbose : int, optional (default: 0)
        Increasing level of output (progress report).
    
    Returns
    -------
    acc : ndarray (shape=(n_k x 1), dtype=float)
        Classification accuracy (`n_k`... number of items in parameter `k`)
        
        HINT: Refering to the above example... 
        ... ``acc[0]`` gives the accuracy of the ``k=1`` experiment.
    corr : ndarray (shape=(n_k x n), dtype=int)
        Raw vectors of correctly classified items
        
        HINT: ... ``corr[1, :]`` gives these items for the ``k=5`` experiment.
    cmat : ndarray (shape=(n_k x n_t x n_t), dtype=int) 
        Confusion matrix (``n_t`` number of unique items in parameter target)
        
        HINT: ... ``cmat[2, :, :]`` gives the confusion matrix of 
        the ``k=20`` experiment.
    """
    
    # Check input sanity
    log = Logging.ConsoleLogging()
    IO._check_distance_matrix_shape(D)
    IO._check_distance_matrix_shape_fits_labels(D, target)
    IO._check_valid_metric_parameter(metric)
    if metric == 'distance':
        d_self = np.inf
        sort_order = 1
    if metric == 'similarity':
        d_self = -np.inf
        sort_order = -1
    
    # Copy, because data is changed
    D = D.copy()
    target = target.astype(int)
    
    if verbose:
        log.message("Start k-NN experiment.")
    # Handle LOO-CV vs. test set mode
    if test_set_ind is None:
        n = D.shape[0]
        test_set_ind = range(n)    # dummy 
        train_set_ind = n   # dummy
    else:  
        # number of points to be classified
        n = test_set_ind.size
        # Indices of training examples
        train_set_ind = np.setdiff1d(np.arange(n), test_set_ind)
    # Number of k-NN parameters
    try:
        k_length = k.size
    except AttributeError as e:
        if isinstance(k, int):
            k = np.array([k])
            k_length = k.size
        elif isinstance(k, list):
            k = np.array(k)
            k_length = k.size
        else:
            raise e
        
    acc = np.zeros((k_length, 1))
    corr = np.zeros((k_length, D.shape[0]))
        
    cl = np.sort(np.unique(target))
    cmat = np.zeros((k_length, len(cl), len(cl)))
    
    classes = target.copy()
    for idx, cur_class in enumerate(cl):
        # change labels to 0, 1, ..., len(cl)-1
        classes[target == cur_class] = idx
    
    cl = range(len(cl))
    
    # Classify each point in test set
    for i in test_set_ind:
        seed_class = classes[i]
        
        if issparse(D):
            row = D.getrow(i).toarray().ravel()
        else:
            row = D[i, :]
        row[i] = d_self
        
        # Sort points in training set according to distance
        # Randomize, in case there are several points of same distance
        # (this is especially relevant for SNN rescaling)
        rp = train_set_ind
        rp = np.random.permutation(rp)
        d2 = row[rp]
        d2idx = np.argsort(d2, axis=0)[::sort_order]
        idx = rp[d2idx]      
        
        # More than one k is useful for cheap multiple k-NN experiments at once
        for j in range(k_length):
            nn_class = classes[idx[0:k[j]]]
            cs = np.bincount(nn_class.astype(int))
            max_cs = np.where(cs == np.max(cs))[0]
            
            # "tie": use nearest neighbor
            if len(max_cs) > 1:
                if seed_class == nn_class[0]:
                    acc[j] += 1/n 
                    corr[j, i] = 1
                cmat[j, seed_class, nn_class[0]] += 1       
            # majority vote
            else:
                if cl[max_cs[0]] == seed_class:
                    acc[j] += 1/n
                    corr[j, i] = 1
                cmat[j, seed_class, cl[max_cs[0]]] += 1
                       
    if verbose:
        log.message("Finished k-NN experiment.")
        
    return acc, corr, cmat
コード例 #2
0
def predict(D:np.ndarray, target:np.ndarray, k=5,
            metric:str='distance', test_ind:np.ndarray=None, verbose:int=0,
            sample_idx=None, return_cmat=True):
    """Perform `k`-nearest neighbor classification.

    Use the ``n x n`` symmetric distance matrix `D` and target class
    labels `target` to perform a `k`-NN experiment (leave-one-out
    cross-validation or evaluation of test set; see parameter `test_ind`).
    Ties are broken by the nearest neighbor.

    Parameters
    ----------
    D : ndarray
        The ``n x n`` symmetric distance (similarity) matrix.

    target : ndarray (of dtype=int)
        The ``n x 1`` target class labels (ground truth) or
        ``n x c`` in case of ``c`` binarized multilabels

    k : int or array_like (of dtype=int), optional (default: 5)
        Neighborhood size for `k`-NN classification.
        For each value in `k`, one `k`-NN experiment is performed.

        HINT: Providing more than one value for `k` is a cheap means to perform
        multiple `k`-NN experiments at once. Try e.g. ``k=[1, 5, 20]``.

    metric : {'distance', 'similarity'}, optional (default: 'distance')
        Define, whether matrix `D` is a distance or similarity matrix

    test_ind : ndarray, optional (default: None)
        Define data points to be hold out as part of a test set. Can be:

        - None : Perform a LOO-CV experiment
        - ndarray : Hold out points indexed in this array as test set. Fit
          model to remaining data. Evaluate model on test set.

    verbose : int, optional (default: 0)
        Increasing level of output (progress report).

    return_cmat : bool, optional, default: True
        If False, only return the predictions `y_pred`.
        Otherwise also return the confusion matrices.

    Returns
    -------
    y_pred : ndarray (shape=(n_k, n, c), dtype=int)
        Predicted class labels (`n_k`... number of items in parameter `k`)
        
        HINT: Referring to the above example... 
        ... ``y_pred[0]`` gives the predictions of the ``k=1`` experiment.

    cmat : ndarray (shape=(n_k x c x n_t x n_t), dtype=int) 
        Confusion matrix (``n_t`` number of unique items in parameter target)

        HINT: ... ``cmat[2, 0, :, :]`` gives the confusion matrix of
        the first class in the ``k=20`` experiment in the following order:
            TN    FP
            FN    TP
    """

    # Check input sanity
    log = ConsoleLogging()
    if sample_idx is None:
        io.check_distance_matrix_shape(D)
    else:
        io.check_sample_shape_fits(D, sample_idx)
    #io._check_distance_matrix_shape_fits_labels(D, target)
    io.check_valid_metric_parameter(metric)
    if metric == 'distance':
        d_self = np.inf
        sort_order = 1
    if metric == 'similarity':
        d_self = -np.inf
        sort_order = -1

    # Copy, because data is changed
    if not issparse(D):
        D = D.copy()
    target = target.astype(int)
    if target.ndim == 1:
        target = target[:, np.newaxis]
    if verbose:
        log.message("Start k-NN experiment.")
    # Handle LOO-CV vs. test set mode
    if test_ind is None:
        n = D.shape[0]
        test_set_ind = range(n)    # dummy     io.check_valid_metric_parameter(metric)
        train_set_ind = n   # dummy
    else:
        # number of points to be classified
        n = test_set_ind.size
        # Indices of training examples
        train_set_ind = np.setdiff1d(np.arange(n), test_set_ind)
        if sample_idx is not None:
            raise NotImplementedError("Sample k-NN does not support train/"
                                      "test splits at the moment.")
    # Number of k-NN parameters
    try:
        k_length = k.size
    except AttributeError as e:
        if isinstance(k, int):
            k = np.array([k])
            k_length = k.size
        elif isinstance(k, list):
            k = np.array(k)
            k_length = k.size
        else:
            raise e

    cl = np.sort(np.unique(target))
    cmat = np.zeros((k_length, target.shape[1], len(cl), len(cl)), dtype=int)
    y_pred = np.zeros((k_length, *target.shape), dtype=int)

    classes = target.copy()
    for idx, cur_class in enumerate(np.array(cl).ravel()):
        # change labels to 0, 1, ..., len(cl)-1
        classes[target == cur_class] = idx
    if sample_idx is not None:
        sample_classes = classes[sample_idx]
        j = np.ones(n, int)
        j *= (n+1) # illegal indices will throw index out of bounds error
        j[sample_idx] = np.arange(len(sample_idx))
        for j, sample in enumerate(sample_idx):
            D[sample, j] = d_self
    cl = range(len(cl))

    # Classify each point in test set
    for i in test_set_ind:
        if verbose and ((i+1)%1000==0 or i+1==n):
            log.message("Prediction: {} of {}.".format(i+1, n), flush=True)

        if issparse(D):
            row = D.getrow(i)
            #row = D.data
            ind = row.nonzero()[1]
            row = row.toarray().ravel()
        else:
            row = D[i, :]
        if sample_idx is None:
            row[i] = d_self

        # Sort points in training set according to distance
        # Randomize, in case there are several points of same distance
        # (this is especially relevant for SNN rescaling)
        if sample_idx is None:
            rp = train_set_ind
        else:
            if issparse(D):
                rp = ind
            else:
                rp = np.arange(len(sample_idx))
        rp = np.random.permutation(rp)
        d2 = row[rp]
        d2idx = np.argsort(d2, axis=0)[::sort_order]
        d2idx = d2idx[~np.isnan(d2[d2idx])] # filter NaN values
        idx = rp[d2idx]

        # More than one k is useful for cheap multiple k-NN experiments at once
        for j in range(k_length):
            # Make sure no inf/-inf/nan values are used for classification
            finite_val = np.isfinite(row[idx[0:k[j]]])
            # However, if no values are finite, classify randomly
            if finite_val.sum() == 0:
                idx = np.random.permutation(idx)
                finite_val = np.ones_like(finite_val)
                log.warning("Query was classified randomly, because all "
                            "distances were non-finite numbers.")
            for l in range(target.shape[1]):
                l_classes = classes[:, l]
                if sample_idx is None:
                    nn_class = l_classes[idx[0:k[j]]][finite_val]
                else:
                    l_sample_classes = sample_classes[:, l]
                    nn_class = l_sample_classes[idx[0:k[j]]][finite_val]
                cs = np.bincount(nn_class.astype(int))
                max_cs = np.where(cs == np.max(cs))[0]
                seed_class = classes[i, l]
                # "tie": use nearest neighbor
                if len(max_cs) > 1:
                    y_pred[j, i, l] = nn_class[0]
                    cmat[j, l, seed_class, nn_class[0]] += 1
                # majority vote
                else:
                    y_pred[j, i, l] = cl[max_cs[0]]
                    cmat[j, l, seed_class, cl[max_cs[0]]] += 1

    if verbose:
        log.message("Finished k-NN experiment.")

    if return_cmat:
        return y_pred, cmat
    else:
        return y_pred
コード例 #3
0
def _mutual_proximity_gammai_sparse(S: np.ndarray,
                                    min_nnz: int = 30,
                                    test_set_ind: np.ndarray = None,
                                    verbose: int = 0,
                                    log=None):
    """MP gammai for sparse similarity matrices. 
    
    Please do not directly use this function, but invoke via 
    mutual_proximity_gammai()
    """
    n = S.shape[0]
    self_value = 1.
    if test_set_ind is None:
        train_set_ind = slice(0, n)
    else:
        train_set_ind = np.setdiff1d(np.arange(n), test_set_ind)

    # mean, variance WITH zero values
    #=======================================================================
    # from sklearn.utils.sparsefuncs_fast import csr_mean_variance_axis0
    # mu, va = csr_mean_variance_axis0(self.S[train_set_mask])
    #=======================================================================

    # mean, variance WITHOUT zero values (missing values), ddof=1
    if S.diagonal().max() != 1. or S.diagonal().min() != 1.:
        raise ValueError("Self similarities must be 1.")
    S_param = S[train_set_ind]
    # the -1 accounts for self similarities that must be excluded from the calc
    mu = np.array((S_param.sum(0) - 1) / (S_param.getnnz(0) - 1)).ravel()
    E2 = mu**2
    X = S_param.copy()
    X.data **= 2
    n_x = (X.getnnz(0) - 1)
    E1 = np.array((X.sum(0) - 1) / (n_x)).ravel()
    del X
    # for an unbiased sample variance
    va = n_x / (n_x - 1) * (E1 - E2)
    del E1

    A = E2 / va
    B = va / mu
    del mu, va, E2
    A[A < 0] = np.nan
    B[B <= 0] = np.nan

    S_mp = lil_matrix(S.shape, dtype=np.float32)
    nnz = S.getnnz(axis=1)  # nnz per row

    for i in range(n):
        if verbose and log and ((i + 1) % 1000 == 0 or i + 1 == n):
            log.message("MP_gammai: {} of {}".format(i + 1, n), flush=True)
        j_idx = slice(i + 1, n)

        Dij = S[i, j_idx].toarray().ravel()  #Extract dense rows temporarily
        tmp = np.empty(n - i)
        tmp[0] = self_value / 2.
        if nnz[i] <= min_nnz:
            tmp[1:] = np.nan
        else:
            p1 = _local_gamcdf(Dij, A[i], B[i])
            del Dij
            Dji = S[j_idx, i].toarray().ravel()  #for vectorization below.
            p2 = _local_gamcdf(Dji, A[j_idx], B[j_idx])
            del Dji
            tmp[1:] = (p1 * p2).ravel()
            S_mp[i, i:] = tmp
            del tmp, j_idx
    S_mp += S_mp.T

    # Retain original distances for objects with too few neighbors.
    # That is, keep distances FROM these objects to others (rows), but
    # set distances of other objects TO them to NaN (columns).
    # Returned matrix is thus NOT SYMMETRIC.
    for row in np.argwhere(nnz <= min_nnz):
        row = row[0]  # use scalar for indexing instead of array
        S_mp[row, :] = S.getrow(row)
    return S_mp.tocsr()
コード例 #4
0
def score(D:np.ndarray, target:np.ndarray, k=5,
          metric:str='distance', test_set_ind:np.ndarray=None, verbose:int=0,
          sample_idx=None, filter_self=True):
    """Perform `k`-nearest neighbor classification.

    Use the ``n x n`` symmetric distance matrix `D` and target class
    labels `target` to perform a `k`-NN experiment (leave-one-out
    cross-validation or evaluation of test set; see parameter `test_set_ind`).
    Ties are broken by the nearest neighbor.

    Parameters
    ----------
    D : ndarray
        The ``n x n`` symmetric distance (similarity) matrix.

    target : ndarray (of dtype=int)
        The ``n x 1`` target class labels (ground truth).

    k : int or array_like (of dtype=int), optional (default: 5)
        Neighborhood size for `k`-NN classification.
        For each value in `k`, one `k`-NN experiment is performed.

        HINT: Providing more than one value for `k` is a cheap means to perform
        multiple `k`-NN experiments at once. Try e.g. ``k=[1, 5, 20]``.

    metric : {'distance', 'similarity'}, optional (default: 'distance')
        Define, whether matrix `D` is a distance or similarity matrix

    test_sed_ind : ndarray, optional (default: None)
        Define data points to be hold out as part of a test set. Can be:

        - None : Perform a LOO-CV experiment
        - ndarray : Hold out points indexed in this array as test set. Fit
          model to remaining data. Evaluate model on test set.

    verbose : int, optional (default: 0)
        Increasing level of output (progress report).

    sample_idx : ...
        TODO add description

    filter_self : bool, optional, default: True
        Remove self similarities from sparse ``D``.
        This assumes that the highest similarity per row is the self
        similarity.
        
        NOTE: Quadratic dense matrices are always filtered for self
        distances/similarities, even if `filter_self` is set t0 `False`.
        
    Returns
    -------
    acc : ndarray (shape=(n_k x 1), dtype=float)
        Classification accuracy (`n_k`... number of items in parameter `k`)

        HINT: Refering to the above example... 
        ... ``acc[0]`` gives the accuracy of the ``k=1`` experiment.
    corr : ndarray (shape=(n_k x n), dtype=int)
        Raw vectors of correctly classified items

        HINT: ... ``corr[1, :]`` gives these items for the ``k=5`` experiment.
    cmat : ndarray (shape=(n_k x n_t x n_t), dtype=int) 
        Confusion matrix (``n_t`` number of unique items in parameter target)

        HINT: ... ``cmat[2, :, :]`` gives the confusion matrix of
        the ``k=20`` experiment.
    """

    # Check input sanity
    log = ConsoleLogging()
    if sample_idx is None:
        io.check_distance_matrix_shape(D)
    else:
        io.check_sample_shape_fits(D, sample_idx)
    io.check_distance_matrix_shape_fits_labels(D, target)
    io.check_valid_metric_parameter(metric)
    if metric == 'distance':
        d_self = np.inf
        sort_order = 1
    if metric == 'similarity':
        d_self = -np.inf
        sort_order = -1

    # Copy, because data is changed
    D = D.copy()
    target = target.astype(int)
    D_is_sparse = issparse(D)

    if verbose:
        log.message("Start k-NN experiment.")
    # Handle LOO-CV vs. test set mode
    if test_set_ind is None:
        n = D.shape[0]
        test_set_ind = range(n)    # dummy 
        train_set_ind = n   # dummy
    else:
        # number of points to be classified
        n = test_set_ind.size
        # Indices of training examples
        train_set_ind = np.setdiff1d(np.arange(n), test_set_ind)
        if sample_idx is not None:
            raise NotImplementedError("Sample k-NN does not support train/"
                                      "test splits at the moment.")
    # Number of k-NN parameters
    try:
        k_length = k.size
    except AttributeError as e:
        if isinstance(k, int):
            k = np.array([k])
            k_length = k.size
        elif isinstance(k, list):
            k = np.array(k)
            k_length = k.size
        else:
            raise e

    acc = np.zeros((k_length, 1))
    corr = np.zeros((k_length, D.shape[0]))

    cl = np.sort(np.unique(target))
    if D_is_sparse:
        # Add a label for unknown class (object w/o nonzero sim to any others)
        cl = np.append(cl, cl.max()+1)
        n_classes = len(cl) + 1
    else:
        n_classes = len(cl)
    cmat = np.zeros((k_length, n_classes, n_classes))

    classes = target.copy()
    for idx, cur_class in enumerate(cl):
        # change labels to 0, 1, ..., len(cl)-1
        classes[target == cur_class] = idx
    if sample_idx is not None:
        sample_classes = classes[sample_idx]
        j = np.ones(n, int)
        j *= (n+1) # illegal indices will throw index out of bounds error
        j[sample_idx] = np.arange(len(sample_idx))
        for j, sample in enumerate(sample_idx):
            D[sample, j] = d_self
    cl = range(len(cl))

    rnd_classif = np.zeros(k_length)
    # Classify each point in test set
    for i in test_set_ind:
        if verbose and ((i+1)%1000==0 or i+1==n):
            log.message("Prediction: {} of {}.".format(i+1, n), flush=True)

        seed_class = classes[i]

        if D_is_sparse:
            row = D.getrow(i)
        else:
            row = D[i, :]
            if sample_idx is None:
                row[i] = d_self

        # Sort points in training set according to distance
        # Randomize, in case there are several points of same distance
        # (this is especially relevant for SNN rescaling)
        if sample_idx is None:
            rp = train_set_ind
        else:
            rp = np.arange(len(sample_idx))
        if D_is_sparse:
            nnz = row.nnz
            rp = np.random.permutation(nnz)
            d2 = row.data[rp]
            # Partition for each k value
            kth = nnz - k - 1
            # sort the two highest similarities to end
            kth = np.append(kth, [nnz-2, nnz-1])
            # Clip negative indices (nnz < k)
            np.clip(kth, a_min=0, a_max=nnz-1, out=kth)
            # Remove duplicate k values and sort
            kth = np.unique(kth)
            d2idx = np.argpartition(d2, kth=kth)
            d2idx = d2idx[~np.isnan(d2[d2idx])][::-1]
            idx = row.nonzero()[1][rp[d2idx]]
            idx = idx[1:] # rem self sim
        else:
            rp = np.random.permutation(rp)
            d2 = row[rp]
            d2idx = np.argsort(d2, axis=0)[::sort_order]
            d2idx = d2idx[~np.isnan(d2[d2idx])] # filter NaN values
            idx = rp[d2idx]

        # More than one k is useful for cheap multiple k-NN experiments at once
        for j in range(k_length):
            # Make sure no inf/-inf/nan values are used for classification
            if D_is_sparse:
                #print(row[0, idx[0:k[j]]].toarray())
                finite_val = np.isfinite(row[0, idx[0:k[j]]].toarray().ravel())
                #print(finite_val)
            else:
                finite_val = np.isfinite(row[idx[0:k[j]]])
            # However, if no values are finite, classify randomly
            if finite_val.sum() == 0:
                idx = np.random.permutation(idx)
                finite_val = np.ones_like(finite_val)
                rnd_classif[j] += 1
            if sample_idx is None:
                nn_class = classes[idx[0:k[j]]][finite_val]
            else:
                #finite_val = np.isfinite(sample_row[idx[0:k[j]]])
                nn_class = sample_classes[idx[0:k[j]]][finite_val]
            cs = np.bincount(nn_class.astype(int))
            if cs.size > 0:
                max_cs = np.where(cs == np.max(cs))[0]
            else:
                max_cs = np.array([len(cl) - 1]) # misclassification label

            # "tie": use nearest neighbor
            if len(max_cs) > 1:
                if seed_class == nn_class[0]:
                    acc[j] += 1/n 
                    corr[j, i] = 1
                cmat[j, seed_class, nn_class[0]] += 1
            # majority vote
            else:
                if cl[max_cs[0]] == seed_class:
                    acc[j] += 1/n
                    corr[j, i] = 1
                cmat[j, seed_class, cl[max_cs[0]]] += 1

    if np.any(rnd_classif):
        for x in rnd_classif:
            log.warning(("{} queries were classified randomly, because all "
                        "distances were non-finite numbers.").format(x))
    if verbose:
        log.message("Finished k-NN experiment.")

    return acc, corr, cmat
コード例 #5
0
def _mutual_proximity_gumbel_sparse(S: np.ndarray,
                                    min_nnz: int = 30,
                                    test_set_ind: np.ndarray = None,
                                    verbose: int = 0,
                                    log=None):
    """MP Gumbel for sparse similarity matrices. 

    Please do not directly use this function, but invoke via 
    mutual_proximity_gumbel()
    """
    n = S.shape[0]
    self_value = 1.
    if test_set_ind is None:
        train_set_ind = slice(0, n)
    else:
        train_set_ind = np.setdiff1d(np.arange(n), test_set_ind)

    # mean, variance WITHOUT zero values (missing values), ddof=1
    if S.diagonal().max() != 1. or S.diagonal().min() != 1.:
        raise ValueError("Self similarities must be 1.")
    S_param = S[train_set_ind]
    # the -1 accounts for self similarities that must be excluded from the calc
    mu = np.array((S_param.sum(0) - 1) / (S_param.getnnz(0) - 1)).ravel()
    E2 = mu**2
    X = S_param.copy()
    X.data **= 2
    n_x = (X.getnnz(0) - 1)
    E1 = np.array((X.sum(0) - 1) / (n_x)).ravel()
    del X
    # for an unbiased sample variance
    va = n_x / (n_x - 1) * (E1 - E2)
    del E1, E2
    sd = np.sqrt(va)
    del va

    # Euler-Mascheroni gamma=.57721566490153286 (https://oeis.org/A001620)
    EULER_MASCHERONI = np.euler_gamma
    beta_hat = sd * np.sqrt(6) / np.pi
    mu_hat = mu - EULER_MASCHERONI * beta_hat

    del mu, sd

    S_mp = lil_matrix(S.shape, dtype=np.float32)
    nnz = S.getnnz(axis=1)  # nnz per row

    for i in range(n):
        if verbose and log and ((i + 1) % 1000 == 0 or i + 1 == n):
            log.message("MP_gumbel: {} of {}".format(i + 1, n), flush=True)
        j_idx = slice(i + 1, n)

        Dij = S[i, j_idx].toarray().ravel()  #Extract dense rows temporarily
        tmp = np.empty(n - i)
        tmp[0] = self_value / 2.
        if nnz[i] <= min_nnz:
            tmp[1:] = np.nan
        else:  # Rescale iff there are enough neighbors for current point
            p1 = _gumbelcdf(Dij, mu_hat[i], beta_hat[i])
            p1[Dij == 0] = 0.
            del Dij
            Dji = S[j_idx, i].toarray().ravel()  #for vectorization below.
            p2 = _gumbelcdf(Dji, mu_hat[j_idx], beta_hat[j_idx])
            p2[Dji == 0] = 0.
            del Dji
            tmp[1:] = (p1 * p2).ravel()
            S_mp[i, i:] = tmp
            del tmp, j_idx
    S_mp += S_mp.T

    # Retain original distances for objects with too few neighbors.
    # That is, keep distances FROM these objects to others (rows), but
    # set distances of other objects TO them to NaN (columns).
    # Returned matrix is thus NOT SYMMETRIC.
    for row in np.argwhere(nnz <= min_nnz):
        row = row[0]  # use scalar for indexing instead of array
        S_mp[row, :] = S.getrow(row)
    return S_mp.tocsr()