def test_row_distance(self): hc = HierarchicalClustering(self.data) available_answers = [175.803, 453.0] # euclidean, Manhattan dist = hc.row_distance("Polona", "Rajko") equal = np.isclose(dist, available_answers, atol=1e-2) self.assertTrue(equal.any())
def test_run(self): hc = HierarchicalClustering(self.data) hc.row_distance = lambda a, b: distance.euclidean( self.data[a], self.data[b]) hc.run() self.assertTrue( compare_trees(hc.clusters, CLUSTER_AVG_MAX) or compare_trees(hc.clusters, CLUSTER_MIN))
def test_cluster_distance(self): hc = HierarchicalClustering(self.data) ca = [["Albert"], [["Branka"], ["Cene"]]] cb = [["Nika"], ["Polona"]] available_dists = [124.99, 165.86, 75.94] hc.row_distance = lambda a, b: distance.euclidean( self.data[a], self.data[b]) equal = np.isclose(hc.cluster_distance(ca, cb), available_dists, atol=1e-2) self.assertTrue(equal.any())
import matplotlib.pyplot as plt # Analysis of the results of hierarchical clustering ######################## # Author: Jernej Vivod # ######################## DATA_FILE = "eurovision-final.csv" # Read data. labels = get_labels( DATA_FILE ) # Get labels (2xn matrix) that maps each country in first column to its region # in the second column hc = HierarchicalClustering( read_file(DATA_FILE) ) # Create a HierarchicalClustering instance initialized with parsed data. # Get groups and create a dictionary where index of groups maps to its members hc.get_groups(11) hc.extract_group_members() ## (1) ## For each group in hc.groups compute how many points it gave to every country. ## points_to_countries = dict() # Go over group indices. for group_index in hc.groups.keys(): sum_points = np.zeros( 47, dtype=int ) # Create empty vector for computing the cummulative sums of points for each country.