Esempio n. 1
0
    def test_row_distance(self):
        hc = HierarchicalClustering(self.data)

        available_answers = [175.803, 453.0]  # euclidean, Manhattan
        dist = hc.row_distance("Polona", "Rajko")

        equal = np.isclose(dist, available_answers, atol=1e-2)

        self.assertTrue(equal.any())
Esempio n. 2
0
    def test_run(self):
        hc = HierarchicalClustering(self.data)

        hc.row_distance = lambda a, b: distance.euclidean(
            self.data[a], self.data[b])
        hc.run()

        self.assertTrue(
            compare_trees(hc.clusters, CLUSTER_AVG_MAX)
            or compare_trees(hc.clusters, CLUSTER_MIN))
Esempio n. 3
0
    def test_cluster_distance(self):
        hc = HierarchicalClustering(self.data)
        ca = [["Albert"], [["Branka"], ["Cene"]]]
        cb = [["Nika"], ["Polona"]]
        available_dists = [124.99, 165.86, 75.94]

        hc.row_distance = lambda a, b: distance.euclidean(
            self.data[a], self.data[b])

        equal = np.isclose(hc.cluster_distance(ca, cb),
                           available_dists,
                           atol=1e-2)

        self.assertTrue(equal.any())
Esempio n. 4
0
import matplotlib.pyplot as plt

# Analysis of the results of hierarchical clustering

########################
# Author: Jernej Vivod #
########################

DATA_FILE = "eurovision-final.csv"  # Read data.
labels = get_labels(
    DATA_FILE
)  # Get labels (2xn matrix) that maps each country in first column to its region
# in the second column

hc = HierarchicalClustering(
    read_file(DATA_FILE)
)  # Create a HierarchicalClustering instance initialized with parsed data.

# Get groups and create a dictionary where index of groups maps to its members
hc.get_groups(11)
hc.extract_group_members()

## (1) ## For each group in hc.groups compute how many points it gave to every country. ##

points_to_countries = dict()

# Go over group indices.
for group_index in hc.groups.keys():
    sum_points = np.zeros(
        47, dtype=int
    )  # Create empty vector for computing the cummulative sums of points for each country.