Esempio n. 1
0
	def testMakeGroupsFromLabels(self):
		"""Test if we group right."""

		# FIXME: This can be done somewhat better by checking if 
		# each partition has ALL necessary elements and if elements 
		# between each group in partition are DISTINCT. 

		labels1 = [0, 1, 0, 0, 1, 1]

		groups = utils.make_groups_from_labels(labels1, self.data2)
		self.assertEqual(groups, {0: ["A1", "A3", "A4"], 1: ["A2", "A5", "A6"]})

		labels2 = [1, 2, 3, 0, 1, 2]

		groups = utils.make_groups_from_labels(labels2, self.data2)
		self.assertEqual(groups, {0: ["A4"], 1: ["A1", "A5"], 2: ["A2", "A6"],
				3: ["A3"]})

		labels3 = [0, 1, 2, 3, 4, 5]

		groups = utils.make_groups_from_labels(labels3, self.data2)
		self.assertEqual(groups, {0: ["A1"], 1: ["A2"], 2: ["A3"],
				3: ["A4"], 4: ["A5"], 5: ["A6"]})

		# Labels which are 2D points.

		labels4 = [[0, 0], [0, 0], [0, 1], [0, 0], [2, 2], [0, 1]]
		groups = utils.make_groups_from_labels(labels4, self.data2, True)
		self.assertEqual(groups, {(0, 0): ["A1", "A2", "A4"], 
				(0, 1): ["A3", "A6"], (2, 2): ["A5"]})
Esempio n. 2
0
	def testPricesDiffsVecsKmeansClustering(self):
		"""Testing whether kmeans clustering with prices differences
		   vectors works."""

		prices_diffs_vecs = utils.make_prices_diffs_vecs(self.data1)		
		labels, wcss, n = Pycluster.kcluster(prices_diffs_vecs, 3, npass=100)
		clusters = utils.make_groups_from_labels(labels, self.data1)

		# The result should be sth like this modulo group numbers. Probability
		# that this isn't like this with npass=100 is (I think) very low! But
		# it can happen that this grouping will be different.

		suggested_clusters = {0: ['E'], 1: ['A', 'D'], 2: ['B', 'C']}

		# Let's check this.

		num_matches = 0

		for cluster in clusters.values():
			cluster.sort()
			for suggested_cluster in suggested_clusters.values():
				suggested_cluster.sort()
				if cluster == suggested_cluster:
					num_matches = num_matches + 1

		# Ok, so we've found out that each suggested cluster exists
		# in output of our kcluster algorithm and because length of
		# clusters dict is 3 we can be sure these dictionaries are equal.

		self.assertEqual(num_matches, 3)
		self.assertEqual(len(clusters), 3)
Esempio n. 3
0
				dist = dist_measure, npass = number_of_iters, 
				method = dist_method)
	elif algorithm_type == ClusterAlg.HIERARCHICAL:
		tree = Pycluster.treecluster(input_vecs, method = dist_method,
				dist = dist_method)
		labels = tree.cut(number_of_clusters)
	elif algorithm_type == ClusterAlg.SELFORGMAPS:
		labels, celldata = Pycluster.somcluster(input_vecs, nxgrid = xgrid, 
				nygrid = ygrid, niter = number_of_iters)

	# If algorithm is self-organizing maps each item is assigned to
	# a particular 2D point, so we need to create groups from 2D points.
	# See implementation of making groups from labels for details.

	if algorithm_type == ClusterAlg.SELFORGMAPS:
		clusters = utils.make_groups_from_labels(labels, data, True)
	else:
		clusters = utils.make_groups_from_labels(labels, data)

	# Check with which type of key we have to deal with.
	# Any better idea how to check if object is a pair? :)

	keys_are_2D_points = True
	sample_key = clusters.keys()[0]
	try:
		a, b = sample_key
	except TypeError:
		keys_are_2D_points = False	

	# Print output to file.