def end_training (self) : transcripts.printf_information ('Ending training...') self.end_training_1 () self.training_stepper.destroy () self.training_stepper = None self.executed_training = True transcripts.printf_information ('Ended training.') transcripts.printf_output ('Prototypes == %d;', self.get_prototype_count ()) transcripts.printf_output ('Vector length == %d;', self.get_vector_length ())
def print_similarity_matrix (classifier, similarity) : matrix = compute_similarity_matrix (classifier, similarity) matrix_ = list () for line in matrix : line_ = list () for similarity in line : line_.append ("%.4f" % (similarity)) line_ = ' '.join (line_) matrix_.append (line_) for line_ in matrix_ : transcripts.printf_output (line_)
def statistic_1 (self) : self.class_entropies = dict () self.classes_entropy = 0.0 self.class_purities = dict () self.classes_purity = 0.0 for clas, partitions in self.class_to_partition.items () : size = self.classes_size[clas] entropy = 0.0 max_count = 0 for partition in partitions : count = partitions[partition] entropy += float (count) / float (size) * math.log (float (count) / float (size)) if max_count < count : max_count = count entropy *= -1.0 / math.log (len (self.classes)) self.class_entropies[clas] = entropy self.classes_entropy += float (size) / float (self.patterns) * entropy purity = 1.0 / float (size) * float (max_count) self.class_purities[clas] = purity self.classes_purity += float (size) / float (self.patterns) * purity transcripts.printf_output ('Entropy == %.4f:', self.classes_entropy) class_entropies = self.class_entropies.items () class_entropies.sort (key = lambda pair : pair[0]) for clas, entropy in class_entropies : transcripts.printf_output ('\-- %s == %.4f;', clas, entropy) transcripts.printf_output ('Purity == %.4f:', self.classes_purity) class_purities = self.class_purities.items () class_purities.sort (key = lambda pair : pair[0]) for clas, purity in class_purities : transcripts.printf_output ('\-- %s == %.4f;', clas, purity)
def load_from_stream (self, stream) : prototypes = vectors.input_ (stream) if len (prototypes) > 0 : vector_length = len (prototypes[0]) for prototype in prototypes : if vector_length != len (prototype) : raise Exception () else : vector_length = None self.pre_load (prototypes, vector_length) self.prototypes = prototypes self.vector_length = vector_length self.post_load () transcripts.printf_information ('Loaded from stream.') transcripts.printf_output ('Prototypes == %d;', self.get_prototype_count ()) transcripts.printf_output ('Vector length == %d;', self.get_vector_length ())
def end_training_1 (self) : prototypes = list () for index in xrange (self.prototype_count) : numerator = self.prototype_numerators[index] denominator = self.prototype_denominators[index] if denominator == 0 : denominator = 1 prototype = numerator / denominator prototype /= vectors.magnitude (prototype) prototypes.append (prototype) if self.prototypes is not None : prototypes_delta = 0 for index in xrange (self.prototype_count) : old_prototype = self.prototypes[index] new_prototype = prototypes[index] prototype_delta = self.distance (old_prototype, new_prototype) prototypes_delta += prototype_delta self.training_delta = prototypes_delta transcripts.printf_output ('Training delta == %.4f;', self.training_delta) else : self.training_delta = None self.prototypes = prototypes
def statistic_2 (self) : transcripts.printf_output ('Distribution:') classes = self.class_to_partition.items () classes.sort (key = lambda pair : pair[0]) for clas, partitions in classes : transcripts.printf_output ('\-- %s -> %d:', clas, self.classes_size[clas]) if partitions is not None : partitions = partitions.items () partitions.sort (key = lambda pair : pair[1], reverse = True) for partition, count in partitions : transcripts.printf_output (' \-- %s -> %d;', partition, count)
def end_validation_1 (self) : self.ssp = float (self.ss) / float (self.tt) self.sdp = float (self.sd) / float (self.tt) self.dsp = float (self.ds) / float (self.tt) self.ddp = float (self.dd) / float (self.tt) self.rand_statistic = float (self.ss + self.dd) / float (self.ss + self.sd + self.ds + self.dd) self.jaccard_coefficient = float (self.ss) / float (self.ss + self.sd + self.ds) self.folkes_mallows_index = math.sqrt ((float (self.ss) / float (self.ss + self.sd)) * (float (self.ss) / float (self.ss + self.ds))) transcripts.printf_output ('SS (C1 == C2, P1 == P2) == %.4f, %d;', self.ssp, self.ss) transcripts.printf_output ('SD (C1 == C2, P1 != P2) == %.4f, %d;', self.sdp, self.sd) transcripts.printf_output ('DS (C1 != C2, P1 == P2) == %.4f, %d;', self.dsp, self.ds) transcripts.printf_output ('DD (C1 != C2, P1 != P2) == %.4f, %d;', self.ddp, self.dd) transcripts.printf_output ('Rand statistic == %.4f;', self.rand_statistic) transcripts.printf_output ('Jaccard coefficient == %.4f;', self.jaccard_coefficient) transcripts.printf_output ('Folkes and Mallows index == %.4f;', self.folkes_mallows_index)
def print_statistics (self) : transcripts.printf_output ('Prototype to prototype distances:') prototype_identifiers = list (self.prototype_identifiers) prototype_identifiers.sort () for prototype_identifier in self.prototype_identifiers : transcripts.printf_output ('\-- %s:', prototype_identifier) transcripts.printf_output (' \-- sum == %.4f;', self.prototype_to_prototype_distance_sum[prototype_identifier]) transcripts.printf_output (' \-- avg == %.4f;', self.prototype_to_prototype_distance_avg[prototype_identifier]) transcripts.printf_output (' \-- min == %.4f;', self.prototype_to_prototype_distance_min[prototype_identifier]) transcripts.printf_output (' \-- max == %.4f;', self.prototype_to_prototype_distance_max[prototype_identifier]) transcripts.printf_output ('Pattern to prototype distances:') for prototype_identifier in self.prototype_identifiers : transcripts.printf_output ('\-- %s:', prototype_identifier) transcripts.printf_output (' \-- count == %d;', self.pattern_to_prototype_count[prototype_identifier]) transcripts.printf_output (' \-- sum == %.4f;', self.pattern_to_prototype_distance_sum[prototype_identifier]) transcripts.printf_output (' \-- avg == %.4f;', self.pattern_to_prototype_distance_avg[prototype_identifier]) transcripts.printf_output (' \-- min == %.4f;', self.pattern_to_prototype_distance_min[prototype_identifier]) transcripts.printf_output (' \-- max == %.4f;', self.pattern_to_prototype_distance_max[prototype_identifier]) transcripts.printf_output ('Pattern to prototypes distances:') transcripts.printf_output ('\-- count == %d;', self.pattern_count) transcripts.printf_output ('\-- sum == %.4f;', self.pattern_to_prototypes_distance_sum) transcripts.printf_output ('\-- avg == %.4f;', self.pattern_to_prototypes_distance_avg) transcripts.printf_output ('\-- min == %.4f;', self.pattern_to_prototypes_distance_min) transcripts.printf_output ('\-- max == %.4f;', self.pattern_to_prototypes_distance_max) transcripts.printf_output ('Pattern to center distances:') transcripts.printf_output ('\-- count == %d;', self.pattern_count) transcripts.printf_output ('\-- sum == %.4f;', self.pattern_to_center_distance_sum) transcripts.printf_output ('\-- avg == %.4f;', self.pattern_to_center_distance_avg) transcripts.printf_output ('\-- min == %.4f;', self.pattern_to_center_distance_min) transcripts.printf_output ('\-- max == %.4f;', self.pattern_to_center_distance_max) transcripts.printf_output ('PBM index == %.4f;', self.pbm_index)