def testMakeGroupsFromLabels(self): """Test if we group right.""" # FIXME: This can be done somewhat better by checking if # each partition has ALL necessary elements and if elements # between each group in partition are DISTINCT. labels1 = [0, 1, 0, 0, 1, 1] groups = utils.make_groups_from_labels(labels1, self.data2) self.assertEqual(groups, {0: ["A1", "A3", "A4"], 1: ["A2", "A5", "A6"]}) labels2 = [1, 2, 3, 0, 1, 2] groups = utils.make_groups_from_labels(labels2, self.data2) self.assertEqual(groups, {0: ["A4"], 1: ["A1", "A5"], 2: ["A2", "A6"], 3: ["A3"]}) labels3 = [0, 1, 2, 3, 4, 5] groups = utils.make_groups_from_labels(labels3, self.data2) self.assertEqual(groups, {0: ["A1"], 1: ["A2"], 2: ["A3"], 3: ["A4"], 4: ["A5"], 5: ["A6"]}) # Labels which are 2D points. labels4 = [[0, 0], [0, 0], [0, 1], [0, 0], [2, 2], [0, 1]] groups = utils.make_groups_from_labels(labels4, self.data2, True) self.assertEqual(groups, {(0, 0): ["A1", "A2", "A4"], (0, 1): ["A3", "A6"], (2, 2): ["A5"]})
def testPricesDiffsVecsKmeansClustering(self): """Testing whether kmeans clustering with prices differences vectors works.""" prices_diffs_vecs = utils.make_prices_diffs_vecs(self.data1) labels, wcss, n = Pycluster.kcluster(prices_diffs_vecs, 3, npass=100) clusters = utils.make_groups_from_labels(labels, self.data1) # The result should be sth like this modulo group numbers. Probability # that this isn't like this with npass=100 is (I think) very low! But # it can happen that this grouping will be different. suggested_clusters = {0: ['E'], 1: ['A', 'D'], 2: ['B', 'C']} # Let's check this. num_matches = 0 for cluster in clusters.values(): cluster.sort() for suggested_cluster in suggested_clusters.values(): suggested_cluster.sort() if cluster == suggested_cluster: num_matches = num_matches + 1 # Ok, so we've found out that each suggested cluster exists # in output of our kcluster algorithm and because length of # clusters dict is 3 we can be sure these dictionaries are equal. self.assertEqual(num_matches, 3) self.assertEqual(len(clusters), 3)
dist = dist_measure, npass = number_of_iters, method = dist_method) elif algorithm_type == ClusterAlg.HIERARCHICAL: tree = Pycluster.treecluster(input_vecs, method = dist_method, dist = dist_method) labels = tree.cut(number_of_clusters) elif algorithm_type == ClusterAlg.SELFORGMAPS: labels, celldata = Pycluster.somcluster(input_vecs, nxgrid = xgrid, nygrid = ygrid, niter = number_of_iters) # If algorithm is self-organizing maps each item is assigned to # a particular 2D point, so we need to create groups from 2D points. # See implementation of making groups from labels for details. if algorithm_type == ClusterAlg.SELFORGMAPS: clusters = utils.make_groups_from_labels(labels, data, True) else: clusters = utils.make_groups_from_labels(labels, data) # Check with which type of key we have to deal with. # Any better idea how to check if object is a pair? :) keys_are_2D_points = True sample_key = clusters.keys()[0] try: a, b = sample_key except TypeError: keys_are_2D_points = False # Print output to file.