def end_training (self) :
		transcripts.printf_information ('Ending training...')
		self.end_training_1 ()
		self.training_stepper.destroy ()
		self.training_stepper = None
		self.executed_training = True
		transcripts.printf_information ('Ended training.')
		transcripts.printf_output ('Prototypes    == %d;', self.get_prototype_count ())
		transcripts.printf_output ('Vector length == %d;', self.get_vector_length ())
def print_similarity_matrix (classifier, similarity) :
	matrix = compute_similarity_matrix (classifier, similarity)
	matrix_ = list ()
	for line in matrix :
		line_ = list ()
		for similarity in line :
			line_.append ("%.4f" % (similarity))
		line_ = ' '.join (line_)
		matrix_.append (line_)
	for line_ in matrix_ :
		transcripts.printf_output (line_)
	def statistic_1 (self) :
		self.class_entropies = dict ()
		self.classes_entropy = 0.0
		self.class_purities = dict ()
		self.classes_purity = 0.0
		for clas, partitions in self.class_to_partition.items () :
			size = self.classes_size[clas]
			entropy = 0.0
			max_count = 0
			for partition in partitions :
				count = partitions[partition]
				entropy += float (count) / float (size) * math.log (float (count) / float (size))
				if max_count < count :
					max_count = count
			entropy *= -1.0 / math.log (len (self.classes))
			self.class_entropies[clas] = entropy
			self.classes_entropy += float (size) / float (self.patterns) * entropy
			purity = 1.0 / float (size) * float (max_count)
			self.class_purities[clas] = purity
			self.classes_purity += float (size) / float (self.patterns) * purity
		transcripts.printf_output ('Entropy == %.4f:', self.classes_entropy)
		class_entropies = self.class_entropies.items ()
		class_entropies.sort (key = lambda pair : pair[0])
		for clas, entropy in class_entropies :
			transcripts.printf_output ('\-- %s == %.4f;', clas, entropy)
		transcripts.printf_output ('Purity == %.4f:', self.classes_purity)
		class_purities = self.class_purities.items ()
		class_purities.sort (key = lambda pair : pair[0])
		for clas, purity in class_purities :
			transcripts.printf_output ('\-- %s == %.4f;', clas, purity)
	def load_from_stream (self, stream) :
		prototypes = vectors.input_ (stream)
		if len (prototypes) > 0 :
			vector_length = len (prototypes[0])
			for prototype in prototypes :
				if vector_length != len (prototype) :
					raise Exception ()
		else :
			vector_length = None
		self.pre_load (prototypes, vector_length)
		self.prototypes = prototypes
		self.vector_length = vector_length
		self.post_load ()
		transcripts.printf_information ('Loaded from stream.')
		transcripts.printf_output ('Prototypes    == %d;', self.get_prototype_count ())
		transcripts.printf_output ('Vector length == %d;', self.get_vector_length ())
Exemple #5
0
	def end_training_1 (self) :
		prototypes = list ()
		for index in xrange (self.prototype_count) :
			numerator = self.prototype_numerators[index]
			denominator = self.prototype_denominators[index]
			if denominator == 0 :
				denominator = 1
			prototype = numerator / denominator
			prototype /= vectors.magnitude (prototype)
			prototypes.append (prototype)
		if self.prototypes is not None :
			prototypes_delta = 0
			for index in xrange (self.prototype_count) :
				old_prototype = self.prototypes[index]
				new_prototype = prototypes[index]
				prototype_delta = self.distance (old_prototype, new_prototype)
				prototypes_delta += prototype_delta
			self.training_delta = prototypes_delta
			transcripts.printf_output ('Training delta == %.4f;', self.training_delta)
		else :
			self.training_delta = None
		self.prototypes = prototypes
	def statistic_2 (self) :
		transcripts.printf_output ('Distribution:')
		classes = self.class_to_partition.items ()
		classes.sort (key = lambda pair : pair[0])
		for clas, partitions in classes :
			transcripts.printf_output ('\-- %s -> %d:', clas, self.classes_size[clas])
			if partitions is not None :
				partitions = partitions.items ()
				partitions.sort (key = lambda pair : pair[1], reverse = True)
				for partition, count in partitions :
					transcripts.printf_output ('   \-- %s -> %d;', partition, count)
	def end_validation_1 (self) :
		self.ssp = float (self.ss) / float (self.tt)
		self.sdp = float (self.sd) / float (self.tt)
		self.dsp = float (self.ds) / float (self.tt)
		self.ddp = float (self.dd) / float (self.tt)
		self.rand_statistic = float (self.ss + self.dd) / float (self.ss + self.sd + self.ds + self.dd)
		self.jaccard_coefficient = float (self.ss) / float (self.ss + self.sd + self.ds)
		self.folkes_mallows_index = math.sqrt ((float (self.ss) / float (self.ss + self.sd)) * (float (self.ss) / float (self.ss + self.ds)))
		transcripts.printf_output ('SS (C1 == C2, P1 == P2)  == %.4f, %d;', self.ssp, self.ss)
		transcripts.printf_output ('SD (C1 == C2, P1 != P2)  == %.4f, %d;', self.sdp, self.sd)
		transcripts.printf_output ('DS (C1 != C2, P1 == P2)  == %.4f, %d;', self.dsp, self.ds)
		transcripts.printf_output ('DD (C1 != C2, P1 != P2)  == %.4f, %d;', self.ddp, self.dd)
		transcripts.printf_output ('Rand statistic           == %.4f;', self.rand_statistic)
		transcripts.printf_output ('Jaccard coefficient      == %.4f;', self.jaccard_coefficient)
		transcripts.printf_output ('Folkes and Mallows index == %.4f;', self.folkes_mallows_index)
	def print_statistics (self) :
		transcripts.printf_output ('Prototype to prototype distances:')
		prototype_identifiers = list (self.prototype_identifiers)
		prototype_identifiers.sort ()
		for prototype_identifier in self.prototype_identifiers :
			transcripts.printf_output ('\-- %s:', prototype_identifier)
			transcripts.printf_output ('   \-- sum == %.4f;', self.prototype_to_prototype_distance_sum[prototype_identifier])
			transcripts.printf_output ('   \-- avg == %.4f;', self.prototype_to_prototype_distance_avg[prototype_identifier])
			transcripts.printf_output ('   \-- min == %.4f;', self.prototype_to_prototype_distance_min[prototype_identifier])
			transcripts.printf_output ('   \-- max == %.4f;', self.prototype_to_prototype_distance_max[prototype_identifier])
		transcripts.printf_output ('Pattern to prototype distances:')
		for prototype_identifier in self.prototype_identifiers :
			transcripts.printf_output ('\-- %s:', prototype_identifier)
			transcripts.printf_output ('   \-- count == %d;', self.pattern_to_prototype_count[prototype_identifier])
			transcripts.printf_output ('   \-- sum   == %.4f;', self.pattern_to_prototype_distance_sum[prototype_identifier])
			transcripts.printf_output ('   \-- avg   == %.4f;', self.pattern_to_prototype_distance_avg[prototype_identifier])
			transcripts.printf_output ('   \-- min   == %.4f;', self.pattern_to_prototype_distance_min[prototype_identifier])
			transcripts.printf_output ('   \-- max   == %.4f;', self.pattern_to_prototype_distance_max[prototype_identifier])
		transcripts.printf_output ('Pattern to prototypes distances:')
		transcripts.printf_output ('\-- count == %d;', self.pattern_count)
		transcripts.printf_output ('\-- sum   == %.4f;', self.pattern_to_prototypes_distance_sum)
		transcripts.printf_output ('\-- avg   == %.4f;', self.pattern_to_prototypes_distance_avg)
		transcripts.printf_output ('\-- min   == %.4f;', self.pattern_to_prototypes_distance_min)
		transcripts.printf_output ('\-- max   == %.4f;', self.pattern_to_prototypes_distance_max)
		transcripts.printf_output ('Pattern to center distances:')
		transcripts.printf_output ('\-- count == %d;', self.pattern_count)
		transcripts.printf_output ('\-- sum == %.4f;', self.pattern_to_center_distance_sum)
		transcripts.printf_output ('\-- avg == %.4f;', self.pattern_to_center_distance_avg)
		transcripts.printf_output ('\-- min == %.4f;', self.pattern_to_center_distance_min)
		transcripts.printf_output ('\-- max == %.4f;', self.pattern_to_center_distance_max)
		transcripts.printf_output ('PBM index == %.4f;', self.pbm_index)