def mp_gaussi(self, train_set_mask=None, verbose=False, sample_size=0, n_jobs=-1): """ .. note:: Deprecated in hub-toolbox 2.3 Class will be removed in hub-toolbox 3.0. Please use static functions instead. """ if self.isSimilarityMatrix: metric = 'similarity' else: metric = 'distance' if train_set_mask is not None: test_set_ind = np.setdiff1d(np.arange(self.D.shape[0]), train_set_mask) return mutual_proximity_gaussi(self.D, metric, sample_size, test_set_ind, verbose, n_jobs, self.mv)
def mutual_proximity_gaussi(D:np.ndarray, metric:str='distance', sample_size:int=0, test_set_ind:np.ndarray=None, verbose:int=0, n_jobs:int=-1, mv=None): """Transform a distance matrix with Mutual Proximity (indep. normal distr.). Applies Mutual Proximity (MP) [1]_ on a distance/similarity matrix. Gaussi variant assumes independent normal distributions (FAST). The resulting second. distance/similarity matrix should show lower hubness. Parameters ---------- D : ndarray or csr_matrix - ndarray: The ``n x n`` symmetric distance or similarity matrix. - csr_matrix: The ``n x n`` symmetric similarity matrix. metric : {'distance', 'similarity'}, optional (default: 'distance') Define, whether matrix `D` is a distance or similarity matrix. NOTE: In case of sparse `D`, only 'similarity' is supported. sample_size : int, optional (default: 0) Define sample size from which Gauss parameters are estimated. Use all data when set to ``0``. test_sed_ind : ndarray, optional (default: None) Define data points to be hold out as part of a test set. Can be: - None : Rescale all distances - ndarray : Hold out points indexed in this array as test set. verbose : int, optional (default: 0) Increasing level of output (progress report). n_jobs : int, optional (default: -1) Number of parallel processes to be used. NOTE: set ``n_jobs=-1`` to use all CPUs Returns ------- D_mp : ndarray Secondary distance MP gaussi matrix. References ---------- .. [1] Schnitzer, D., Flexer, A., Schedl, M., & Widmer, G. (2012). Local and global scaling reduce hubs in space. The Journal of Machine Learning Research, 13(1), 2871–2902. """ # Initialization n = D.shape[0] # @UnusedVariable log = Logging.ConsoleLogging() IO._check_distance_matrix_shape(D) IO._check_valid_metric_parameter(metric) # DO NOT DELETE comment #=========================================================================== # # Checking input # if D.shape[0] != D.shape[1]: # raise TypeError("Distance/similarity matrix is not quadratic.") # if metric == 'similarity': # self_value = 1 # elif metric == 'distance': # self_value = 0 # else: # raise ValueError("Parameter metric must be 'distance' or 'similarity'.") #=========================================================================== if test_set_ind is None: train_set_ind = slice(0, n) else: train_set_ind = np.setdiff1d(np.arange(n), test_set_ind) #=========================================================================== # # Start MP Gaussi # if verbose: # log.message('Mutual Proximity Gaussi rescaling started.', flush=True) # D = D.copy() #=========================================================================== if issparse(D): return _mutual_proximity_gaussi_sparse(D, sample_size, train_set_ind, verbose, log, mv, n_jobs) else: log.warning("MP gaussi does not support parallel execution for dense " "matrices at the moment. Continuing with 1 process.") from hub_toolbox.MutualProximity import mutual_proximity_gaussi return mutual_proximity_gaussi(D, metric, sample_size, test_set_ind, verbose)
print("Orig. dist. k-NN accuracy:", acc_d) print('Orig. sim. k-NN accuracy:', acc_s) D_mp_emp_d = mutual_proximity_empiric(D) D_mp_emp_s = mutual_proximity_empiric(S, 'similarity') Sn_mp_emp_d, _, _ = hubness(D_mp_emp_d, 5) Sn_mp_emp_s, _, _ = hubness(D_mp_emp_s, 5, 'similarity') print("MP emp dist. hubness:", Sn_mp_emp_d) print("MP emp sim. hubness:", Sn_mp_emp_s) if do == 'dexter': acc_mp_emp_d, _, _ = score(D_mp_emp_d, c, [5], 'distance') acc_mp_emp_s, _, _ = score(D_mp_emp_s, c, [5], 'similarity') print("MP emp dist. k-NN accuracy:", acc_mp_emp_d) print("MP emp sim. k-NN accuracy:", acc_mp_emp_s) D_mp_gaussi_d = mutual_proximity_gaussi(D) D_mp_gaussi_s = mutual_proximity_gaussi(S, 'similarity') Sn_mp_gaussi_d, _, _ = hubness(D_mp_gaussi_d, 5) Sn_mp_gaussi_s, _, _ = hubness(D_mp_gaussi_s, 5, 'similarity') print("MP gaussi dist. hubness:", Sn_mp_gaussi_d) print("MP gaussi sim. hubness:", Sn_mp_gaussi_s) if do == 'dexter': acc_mp_gaussi_d, _, _ = score(D_mp_gaussi_d, c, [5], 'distance') acc_mp_gaussi_s, _, _ = score(D_mp_gaussi_s, c, [5], 'similarity') print("MP gammai dist. k-NN accuracy:", acc_mp_gaussi_d) print("MP gammai sim. k-NN accuracy:", acc_mp_gaussi_s) D_mp_gammai_d = mutual_proximity_gammai(D, 'distance') D_mp_gammai_s = mutual_proximity_gammai(S, 'similarity') Sn_mp_gammai_d, _, _ = hubness(D_mp_gammai_d, 5, 'distance') Sn_mp_gammai_s, _, _ = hubness(D_mp_gammai_s, 5, 'similarity')