def load_dexter(): """Load the dexter data set. .. note:: Deprecated in hub-toolbox 2.3 Will be removed in hub-toolbox 3.0. Please use IO.load_dexter() instead. """ return IO.load_dexter()
def __init__(self, D:np.ndarray=None, classes:np.ndarray=None, vectors:np.ndarray=None, metric:str='distance'): """Initialize a quick hubness analysis. Parameters ---------- D : ndarray, optional (default: None) The n x n symmetric distance (similarity) matrix. Default: load example dataset (dexter). classes : ndarray, optional (default: None) The 1 x n class labels. Required for k-NN, GK. vectors : ndarray, optional (default: None) The m x n vector data. Required for IntrDim estimation. metric : {'distance', 'similarity'} Define whether `D` is a distance or similarity matrix. """ self.has_class_data, self.has_vector_data = False, False if D is None: print('\n' 'NO PARAMETERS GIVEN! Loading & evaluating DEXTER data set.' '\n' 'DEXTER is a text classification problem in a bag-of-word \n' 'representation. This is a two-class classification problem\n' 'with sparse continuous input variables. \n' 'This dataset is one of five datasets of the NIPS 2003\n' 'feature selection challenge.\n' 'http://archive.ics.uci.edu/ml/datasets/Dexter\n') self.D, self.classes, self.vectors = IO.load_dexter() self.has_class_data, self.has_vector_data = True, True self.metric = 'distance' else: # copy data and ensure correct type (not int16 etc.) self.D = np.copy(D).astype(np.float64) if classes is None: self.classes = None else: self.classes = np.copy(classes).astype(np.float64) self.has_class_data = True if vectors is None: self.vectors = None else: self.vectors = np.copy(vectors).astype(np.float64) self.has_vector_data = True self.metric = metric self.n = len(self.D) self.experiments = []