def testMakePricesDifferencesVectors(self): diff_vecs = utils.make_prices_diffs_vecs(self.data1) self.assertEqual(diff_vecs, [[-0.5, 1.5, 10.0], [7.0, 1.0, 1.0]]) data3 = [["A", {0: 1, 1: 1}], ["B", {0: 1, 1: 1}]] diff_vecs = utils.make_prices_diffs_vecs(data3) self.assertEqual(diff_vecs, [[0], [0]])
def testPricesDiffsVecsKmeansClustering(self): """Testing whether kmeans clustering with prices differences vectors works.""" prices_diffs_vecs = utils.make_prices_diffs_vecs(self.data1) labels, wcss, n = Pycluster.kcluster(prices_diffs_vecs, 3, npass=100) clusters = utils.make_groups_from_labels(labels, self.data1) # The result should be sth like this modulo group numbers. Probability # that this isn't like this with npass=100 is (I think) very low! But # it can happen that this grouping will be different. suggested_clusters = {0: ['E'], 1: ['A', 'D'], 2: ['B', 'C']} # Let's check this. num_matches = 0 for cluster in clusters.values(): cluster.sort() for suggested_cluster in suggested_clusters.values(): suggested_cluster.sort() if cluster == suggested_cluster: num_matches = num_matches + 1 # Ok, so we've found out that each suggested cluster exists # in output of our kcluster algorithm and because length of # clusters dict is 3 we can be sure these dictionaries are equal. self.assertEqual(num_matches, 3) self.assertEqual(len(clusters), 3)
except IOError, err: sys.exit(err) # TODO(patryk): plot events. # Preprocessing phase. if compress_to_weekly_data: data = utils.compress_data_weekly(data) if trimming_range > 0: data = eventutils.trim_data_to_events(data, events, trimming_range) input_vecs = [] if treat_data_differentially: input_vecs = utils.make_prices_diffs_vecs(data) else: input_vecs = utils.make_prices_vecs(data) # Run clustering algorithm. if algorithm_type == ClusterAlg.KMEANS: labels, wcss, n = Pycluster.kcluster(input_vecs, number_of_clusters, dist = dist_measure, npass = number_of_iters, method = dist_method) elif algorithm_type == ClusterAlg.HIERARCHICAL: tree = Pycluster.treecluster(input_vecs, method = dist_method, dist = dist_method) labels = tree.cut(number_of_clusters) elif algorithm_type == ClusterAlg.SELFORGMAPS: labels, celldata = Pycluster.somcluster(input_vecs, nxgrid = xgrid,