def test_dis_sim_local(self):
     """Test whether hubness and k-NN accuracy improve for dexter"""
     h_orig = hubness(self.distance)[0]
     acc_orig = score(self.distance, self.target)[0][0, 0]
     dist_dsl = dis_sim_local(self.vectors, k=50)
     h_dsl = hubness(dist_dsl)[0]
     acc_dsl = score(dist_dsl, self.target)[0][0, 0]
     result = (h_orig / h_dsl > 10) & (acc_dsl - acc_orig > 0.03)
     return self.assertTrue(result)
 def test_dis_sim_global(self):
     """Test whether hubness and k-NN accuracy improve for dexter"""
     h_orig = hubness(self.distance)[0]
     acc_orig = score(self.distance, self.target)[0][0, 0]
     dist_dsg = dis_sim_global(self.vectors)
     h_dsg = hubness(dist_dsg)[0]
     acc_dsg = score(dist_dsg, self.target)[0][0, 0]
     result = (h_orig / h_dsg > 2) & (acc_dsg - acc_orig > 0.07)
     return self.assertTrue(result)
 def test_localized_centering(self):
     """Test whether hubness and k-NN accuracy improve for dexter"""
     h_orig = hubness(self.distance)[0]
     acc_orig = score(self.distance, self.target)[0][0, 0]
     sim_lcent = localized_centering(self.vectors, "cosine", 20, 1)
     h_lcent = hubness(sim_lcent, metric="similarity")[0]
     acc_lcent = score(sim_lcent, self.target, metric="similarity")[0][0, 0]
     result = (h_orig / h_lcent > 1.5) & (acc_lcent - acc_orig > 0.03)
     return self.assertTrue(result)
 def test_ls_dist_equals_sim(self):
     """Test for equal RANKS using dist. vs. sim. (LS_dist != 1-LS_sim).
        Using hubness and k-NN accuracy as proxy."""
     self.setUpMod('rnd')
     ls_dist = local_scaling(self.dist, metric='distance')
     ls_sim = local_scaling(1 - self.dist, metric='similarity')
     h_dist, _, _ = hubness(ls_dist, metric='distance')
     h_sim, _, _ = hubness(ls_sim, metric='similarity')
     acc_dist, _, _ = score(ls_dist, self.label, metric='distance')
     acc_sim, _, _ = score(ls_sim, self.label, metric='similarity')
     dist_sim_equal_in_hubness_knn = np.allclose(h_dist, h_sim) and \
                                     np.allclose(acc_dist, acc_sim)
     return self.assertTrue(dist_sim_equal_in_hubness_knn)
Example #5
0
 def test_knn_score_equal_sklearn_loocv_score(self):
     acc, correct, cmat = \
         score(self.distance, self.label, k=5, metric='distance')
     # scoring only one k value, so take just the first elements:
     acc = acc[0, 0]
     correct = correct[0]
     cmat = cmat[0]
     # This should work too, but is much slower than using precomp. dist.
     #=======================================================================
     # knclassifier = KNeighborsClassifier(n_neighbors=5, algorithm='brute', 
     #                                     metric='cosine')
     #=======================================================================
     knclassifier = KNeighborsClassifier(n_neighbors=5, algorithm='brute', 
                                         metric='precomputed')
     n = self.distance.shape[0] # for LOO-CV
     loo_cv = LeaveOneOut(n)
     predicted_sklearn = cross_val_predict(
         knclassifier, self.distance, self.label, cv=loo_cv)
     acc_sklearn = accuracy_score(self.label, predicted_sklearn)
     if not np.allclose(acc, acc_sklearn):
         return self.assertAlmostEqual(acc, acc_sklearn, places=7)
     else:
         correct_sklearn = predicted_sklearn == self.label
         equal_prediction = np.all(correct == correct_sklearn)
         msg = """Accuracies of hub toolbox k-NN and sklearn-kNN are almost 
                  equal, but the predictions per data point are not."""
         return self.assertTrue(equal_prediction, msg)
Example #6
0
 def test_knn_score_matches_correct_prediction_fraction(self):
     k = np.array([1, 5, 20])
     acc, correct, _ = score(self.distance, self.label, k=k)
     acc_match = np.zeros_like(k, dtype=bool)
     for i, _ in enumerate(k):
         cur_acc = acc[i]
         cur_correct = correct[i]
         acc_match[i] = np.allclose(cur_acc, cur_correct.sum() / self.n)
     return self.assertTrue(np.all(acc_match))
Example #7
0
 def test_knn_score_matches_confusion_matrix(self):
     k = np.array([1, 5, 20])
     acc, _, cmat = score(self.distance, self.label, k=k)
     acc_match = np.zeros_like(k, dtype=bool)
     for i, _ in enumerate(k):
         cur_acc = acc[i]
         cur_cmat = cmat[i]
         TP = cur_cmat[0, 0]
         FN = cur_cmat[0, 1]
         FP = cur_cmat[1, 0]
         TN = cur_cmat[1, 1]
         acc_from_cmat = (TP + TN) / (TP + FN + FP + TN)
         acc_match[i] = np.allclose(cur_acc, acc_from_cmat)
     return self.assertTrue(np.all(acc_match))
  from hub_toolbox.Hubness import hubness
  from hub_toolbox.HubnessAnalysis import load_dexter
  from hub_toolbox.KnnClassification import score
  #do = 'random'
  do = 'dexter'
  if do == 'random':
      print("RANDOM DATA:")
      print("------------")
      S = triu(rand(1000, 1000, 0.05, 'csr', np.float32, 43), 1)
      S += S.T
      D = 1. - S.toarray()
  elif do == 'dexter':
      print("DEXTER:")
      print("-------")
      D, c, v = load_dexter()
      acc_d, _, _ = score(D, c, [5], 'distance')
      S = csr_matrix(1 - D)
      acc_s, _, _ = score(S, c, [5], 'similarity')
 
  Sn_d, _, _ = hubness(D, 5, 'distance')
  Sn_s, _, _ = hubness(S, 5, 'similarity')
  print("Orig. dist. hubness:", Sn_d)
  print("Orig. sim.  hubness:", Sn_s)
  if do == 'dexter':
      print("Orig. dist. k-NN accuracy:", acc_d)
      print('Orig. sim.  k-NN accuracy:', acc_s)
      
  D_mp_emp_d = mutual_proximity_empiric(D)
  D_mp_emp_s = mutual_proximity_empiric(S, 'similarity')
  Sn_mp_emp_d, _, _ = hubness(D_mp_emp_d, 5)
  Sn_mp_emp_s, _, _ = hubness(D_mp_emp_s, 5, 'similarity')
 def _calc_knn_accuracy(self, k:int=5):
     """Calculate `k`-NN accuracy."""
     acc, _, _ = score(D=self.secondary_distance, target=self.classes,
                       k=k, metric=self.metric)
     self.knn_accuracy[k] = acc
     return self