def store_to_file (self, path) : transcripts.printf_information ('Storing data to file [%s]...', path) if os.path.exists (path) : transcripts.printf_warning ('File does exist; overwriting.') stream = file (path, 'wb', 512 * 1024) self.store_to_stream (stream) stream.close ()
def execute_training_1 (self, pattern) : best_matched_prototype = None best_matched_activation = None matched_prototypes = list () unmatched_prototypes = list () for prototype in self.prototypes : if self.match (prototype, pattern) > self.vigilance: activation = self.activation (prototype, pattern) if best_matched_activation is None : best_matched_prototype = prototype best_matched_activation = activation elif best_matched_activation < activation : matched_prototypes.append (best_matched_prototype) best_matched_prototype = prototype best_matched_activation = activation else : matched_prototypes.append (prototype) else : unmatched_prototypes.append (prototype) if best_matched_prototype is None : best_matched_prototype = pattern adjusted_prototypes = list () adjusted_prototypes.append (self.adjust (best_matched_prototype, pattern, self.learning)) for prototype in matched_prototypes : adjusted_prototypes.append (prototype) for prototype in unmatched_prototypes : adjusted_prototypes.append (prototype) new_prototypes = list () for prototype in adjusted_prototypes : if vectors.magnitude (prototype) <= 0.000000001 : transcripts.printf_warning ('Prototype vector is almost 0; pruning.') else : new_prototypes.append (prototype) self.prototypes = new_prototypes self.prototype_stepper.update (len (self.prototypes))
def load_from_file (self, path) : transcripts.printf_information ('Loading data from file [%s]...', path) if not os.path.exists (path) : transcripts.printf_warning ('File does not exist; aborting.') return stream = file (path, 'rb', 512 * 1024) self.load_from_stream (stream) stream.close ()
def store (self) : transcripts.printf_information ('Storing data to stream [%s]...', self.data_path) stream = streams.open (self.data_path, 'o') if stream is None : transcripts.printf_warning ('Stream does not exist; aborting.') pickle.dump (self.document_identifiers, stream) pickle.dump (self.document_titles, stream) pickle.dump (self.document_contents, stream) pickle.dump (self.document_vectors, stream) stream.close ()
def load (self) : transcripts.printf_information ('Loading data from stream [%s]...', self.data_path) stream = streams.open (self.data_path, 'i') if stream is None : transcripts.printf_warning ('Stream does not exist; aborting.') return self.document_identifiers = pickle.load (stream) self.document_titles = pickle.load (stream) self.document_contents = pickle.load (stream) self.document_vectors = pickle.load (stream) stream.close ()
def execute_training (self, pattern) : vector_length = len (pattern) if self.vector_length is None : self.vector_length = vector_length else : if self.vector_length != vector_length : raise Exception () if vectors.magnitude (pattern) <= 0.000000001 : transcripts.printf_warning ('Pattern vector is almost 0; ignoring.') return self.training_stepper.increment () self.execute_training_1 (pattern)
def dump(map, dumper): id = map["id"][0] if "topic" in map: topic = map["topic"][0] else: transcripts.printf_warning("Missing topic(s) from article with id [%s]; assuming unknown.", id) topic = "{unknown}" if "title" in map: title = map["title"][0] else: transcripts.printf_error("Missing title from article with id [%s]; assuming id.", id) title = "(%s)" % (id) if "content" in map: content = "\n".join(map["content"]) else: transcripts.printf_error("Missing content from article with id [%s]; ignoring.", id) return title = "[%s] %s" % (topic, title) dumper.dump(title, content)