def mp_gammai(self, train_set_mask=None, verbose=False, n_jobs=-1): """ .. note:: Deprecated in hub-toolbox 2.3 Class will be removed in hub-toolbox 3.0. Please use static functions instead. """ if self.isSimilarityMatrix: metric = 'similarity' else: metric = 'distance' if train_set_mask is not None: test_set_ind = np.setdiff1d(np.arange(self.D.shape[0]), train_set_mask) return mutual_proximity_gammai(self.D, metric, test_set_ind, verbose, n_jobs, self.mv)
def mutual_proximity_gammai(D:np.ndarray, metric:str='distance', test_set_ind:np.ndarray=None, verbose:int=0, n_jobs:int=-1, mv=None): """Transform a distance matrix with Mutual Proximity (indep. Gamma distr.). Applies Mutual Proximity (MP) [1]_ on a distance/similarity matrix. Gammai variant assumes independent Gamma distributed distances (FAST). The resulting second. distance/similarity matrix should show lower hubness. Parameters ---------- D : ndarray or csr_matrix - ndarray: The ``n x n`` symmetric distance or similarity matrix. - csr_matrix: The ``n x n`` symmetric similarity matrix. metric : {'distance', 'similarity'}, optional (default: 'distance') Define, whether matrix `D` is a distance or similarity matrix. NOTE: In case of sparse `D`, only 'similarity' is supported. test_sed_ind : ndarray, optional (default: None) Define data points to be hold out as part of a test set. Can be: - None : Rescale all distances - ndarray : Hold out points indexed in this array as test set. verbose : int, optional (default: 0) Increasing level of output (progress report). n_jobs : int, optional (default: -1) Number of parallel processes to be used. NOTE: set ``n_jobs=-1`` to use all CPUs Returns ------- D_mp : ndarray Secondary distance MP gammai matrix. References ---------- .. [1] Schnitzer, D., Flexer, A., Schedl, M., & Widmer, G. (2012). Local and global scaling reduce hubs in space. The Journal of Machine Learning Research, 13(1), 2871–2902. """ log = Logging.ConsoleLogging() IO._check_distance_matrix_shape(D) IO._check_valid_metric_parameter(metric) n = D.shape[0] sample_size = 0 # not implemented if test_set_ind is None: train_set_ind = slice(0, n) else: train_set_ind = np.setdiff1d(np.arange(n), test_set_ind) if issparse(D): return _mutual_proximity_gammai_sparse(D, sample_size, train_set_ind, verbose, log, mv, n_jobs) else: log.warning("MP gammai does not support parallel execution for dense " "matrices at the moment. Continuing with 1 process.") from hub_toolbox.MutualProximity import mutual_proximity_gammai return mutual_proximity_gammai(D, metric, test_set_ind, verbose)
print("MP emp sim. hubness:", Sn_mp_emp_s) if do == 'dexter': acc_mp_emp_d, _, _ = score(D_mp_emp_d, c, [5], 'distance') acc_mp_emp_s, _, _ = score(D_mp_emp_s, c, [5], 'similarity') print("MP emp dist. k-NN accuracy:", acc_mp_emp_d) print("MP emp sim. k-NN accuracy:", acc_mp_emp_s) D_mp_gaussi_d = mutual_proximity_gaussi(D) D_mp_gaussi_s = mutual_proximity_gaussi(S, 'similarity') Sn_mp_gaussi_d, _, _ = hubness(D_mp_gaussi_d, 5) Sn_mp_gaussi_s, _, _ = hubness(D_mp_gaussi_s, 5, 'similarity') print("MP gaussi dist. hubness:", Sn_mp_gaussi_d) print("MP gaussi sim. hubness:", Sn_mp_gaussi_s) if do == 'dexter': acc_mp_gaussi_d, _, _ = score(D_mp_gaussi_d, c, [5], 'distance') acc_mp_gaussi_s, _, _ = score(D_mp_gaussi_s, c, [5], 'similarity') print("MP gammai dist. k-NN accuracy:", acc_mp_gaussi_d) print("MP gammai sim. k-NN accuracy:", acc_mp_gaussi_s) D_mp_gammai_d = mutual_proximity_gammai(D, 'distance') D_mp_gammai_s = mutual_proximity_gammai(S, 'similarity') Sn_mp_gammai_d, _, _ = hubness(D_mp_gammai_d, 5, 'distance') Sn_mp_gammai_s, _, _ = hubness(D_mp_gammai_s, 5, 'similarity') print("MP gammai dist. hubness:", Sn_mp_gammai_d) print("MP gammai sim. hubness:", Sn_mp_gammai_s) if do == 'dexter': acc_mp_gammai_d, _, _ = score(D_mp_gammai_d, c, [5], 'distance') acc_mp_gammai_s, _, _ = score(D_mp_gammai_s, c, [5], 'similarity') print("MP gammai dist. k-NN accuracy:", acc_mp_gammai_d) print("MP gammai sim. k-NN accuracy:", acc_mp_gammai_s)