Example #1
0
	def testMakePricesDifferencesVectors(self):

		diff_vecs = utils.make_prices_diffs_vecs(self.data1)
		self.assertEqual(diff_vecs, [[-0.5, 1.5, 10.0], [7.0, 1.0, 1.0]])

		data3 = [["A", {0: 1, 1: 1}], ["B", {0: 1, 1: 1}]]
		diff_vecs = utils.make_prices_diffs_vecs(data3)
		self.assertEqual(diff_vecs, [[0], [0]])
Example #2
0
	def testPricesDiffsVecsKmeansClustering(self):
		"""Testing whether kmeans clustering with prices differences
		   vectors works."""

		prices_diffs_vecs = utils.make_prices_diffs_vecs(self.data1)		
		labels, wcss, n = Pycluster.kcluster(prices_diffs_vecs, 3, npass=100)
		clusters = utils.make_groups_from_labels(labels, self.data1)

		# The result should be sth like this modulo group numbers. Probability
		# that this isn't like this with npass=100 is (I think) very low! But
		# it can happen that this grouping will be different.

		suggested_clusters = {0: ['E'], 1: ['A', 'D'], 2: ['B', 'C']}

		# Let's check this.

		num_matches = 0

		for cluster in clusters.values():
			cluster.sort()
			for suggested_cluster in suggested_clusters.values():
				suggested_cluster.sort()
				if cluster == suggested_cluster:
					num_matches = num_matches + 1

		# Ok, so we've found out that each suggested cluster exists
		# in output of our kcluster algorithm and because length of
		# clusters dict is 3 we can be sure these dictionaries are equal.

		self.assertEqual(num_matches, 3)
		self.assertEqual(len(clusters), 3)
Example #3
0
	except IOError, err:
		sys.exit(err)

	# TODO(patryk): plot events.

	# Preprocessing phase.

	if compress_to_weekly_data:
		data = utils.compress_data_weekly(data)		

	if trimming_range > 0:
		data = eventutils.trim_data_to_events(data, events, trimming_range)

	input_vecs = []
	if treat_data_differentially:
		input_vecs = utils.make_prices_diffs_vecs(data)
	else:
		input_vecs = utils.make_prices_vecs(data)

	# Run clustering algorithm.

	if algorithm_type == ClusterAlg.KMEANS:
		labels, wcss, n = Pycluster.kcluster(input_vecs, number_of_clusters, 
				dist = dist_measure, npass = number_of_iters, 
				method = dist_method)
	elif algorithm_type == ClusterAlg.HIERARCHICAL:
		tree = Pycluster.treecluster(input_vecs, method = dist_method,
				dist = dist_method)
		labels = tree.cut(number_of_clusters)
	elif algorithm_type == ClusterAlg.SELFORGMAPS:
		labels, celldata = Pycluster.somcluster(input_vecs, nxgrid = xgrid,