def _calc_secondary_distance(self):
     """Calculate secondary distances (e.g. Mutual Proximity)"""
     sec_dist_fun = SEC_DIST[self.secondary_distance_type]
     try:
         self.secondary_distance = sec_dist_fun(
             D=self.original_distance, metric=self.metric)
     except TypeError: # centering has no keyword 'D='
         if self.secondary_distance_type in ['cent', 'wcent']:
             self.secondary_distance = \
                 cosine_distance(sec_dist_fun(X=self.vectors))
         elif self.secondary_distance_type in ['lcent']:
             self.secondary_distance = 1. - sec_dist_fun(X=self.vectors)
         elif self.secondary_distance_type in ['dsg', 'dsl']:
             self.secondary_distance = sec_dist_fun(X=self.vectors)
         else:
             raise ValueError("Erroneous secondary distance type: {}".
                              format(self.secondary_distance_type))
     return self
Example #2
0
def load_dexter():
    """Load the example data set (dexter).
    
    Returns
    -------
    D : ndarray
        Distance matrix
    classes : ndarray
        Class label vector
    vectors : ndarray
        Vector data matrix
    """
        
    n = 300
    dim = 20000
    
    # Read class labels
    classes_file = os.path.dirname(os.path.realpath(__file__)) +\
        '/example_datasets/dexter_train.labels'
    classes = np.loadtxt(classes_file)  

    # Read data
    vectors = np.zeros((n, dim))
    data_file = os.path.dirname(os.path.realpath(__file__)) + \
        '/example_datasets/dexter_train.data'
    with open(data_file, mode='r') as fid:
        data = fid.readlines()       
    row = 0
    for line in data:
        line = line.strip().split() # line now contains pairs of dim:val
        for word in line:
            col, val = word.split(':')
            vectors[row][int(col)-1] = int(val)
        row += 1
    
    # Calc distance
    D = cosine_distance(vectors)
    return D, classes, vectors
 def test_cosine_dist_equal_to_scipy_pdist_cos(self):
     cos_dist = cosine_distance(self.vectors)
     cos_dist_scipy = squareform(pdist(self.vectors, 'cosine'))
     result = np.allclose(cos_dist, cos_dist_scipy)
     return self.assertTrue(result)