Example #1
0
def alg(groups):
	exit_count = 0
	for group in groups:

		all_representants = get_all_representants(groups)
	
		instance_and_nearest = []
		new_group = Group()
	
		for sample in group.get_instances():
			[nearest_rep] = knn.get_knn(1, sample, all_representants)

			if nearest_rep != group.get_representant():
				nearest_g = get_group_by_representant(nearest_rep, groups)
				instance_and_nearest.append((sample, nearest_g))
				
		if len(instance_and_nearest) == 0:
			exit_count = exit_count + 1

		elif len(instance_and_nearest) == group.count_instances():
			elements = group.get_instances()
			P, med = hama.pca([e[:-1] for e in elements], 1)
			normal_vector = P
			prototype = group.get_representant()[:-1]
			[d] = hama.proj(P, prototype, med)
			
			for inst in group.get_instances():
				[di] = hama.proj(P, inst[:-1], med)
				if d < di:
					group.remove_instance(inst)
					new_group.add_instance(inst)
		else:
			for tupla in instance_and_nearest:
				bad_instance = tupla[0]
				nearest_g = tupla[1]
				
				# [new_nearest] = knn.get_knn(1, bad_instance, [nearest_g.get_representant(), group.get_representant()])
				# if new_nearest == nearest_g.get_representant():
				if nearest_g.get_representant() != group.get_representant() and nearest_g.get_classe() != group.get_classe():
					group.remove_instance(bad_instance)
					new_group.add_instance(bad_instance)
				elif nearest_g.get_representant() != group.get_representant(): # mas são da mesma classe
					nearest_g.add_instance(bad_instance)
					group.remove_instance(bad_instance)
				#	nearest_g.update_all()
				#	group.update_all()

		if not new_group.is_empty():
			new_group.update_all()
			groups.append(new_group)

		update_all_groups(groups)

	if exit_count != len(groups):
		return alg(groups)
		
	return groups
import numpy as np

if __name__ == '__main__':

	database = 'glass'	# the name of the database you want to run
	components = 3		# the number of components you want to use in PCA

	training = load_database('databases/' + database + '.train')
	test = load_database('databases/' + database + '.test')

	training_feats = [t[:-1] for t in training]
	training_class = [t[-1] for t in training]
	
	test_feats = [t[:-1] for t in test]
	test_class = [t[-1] for t in test]

	P, mn = pca(training_feats, components)
	
	pca_training = proj(P, training_feats, mn).tolist()
	pca_training = [a + [b] for (a,b) in zip(pca_training, training_class)]
	
	pca_test = proj(P, test_feats, mn).tolist()
	pca_test = [a + [b] for (a,b) in zip(pca_test, test_class)]

	print '\tK\tKNN\tPCA'
	for k in [1,3,5]:
		print '\t%d	%.2f	%.2f' % (k, get_acertion_tax(k, test, training), get_acertion_tax(k, pca_test, pca_training))