def begin_validation (self, classifier) : transcripts.printf_information ('Begining validation...') self.stepper = transcripts.Stepper ('Validation') self.classifier = classifier self.classifier.begin_classification () self.begin_validation_1 () transcripts.printf_information ('Began validation.')
def begin_training (self) : transcripts.printf_information ('Begining training...') self.training_stepper = transcripts.Stepper ('Training') self.begin_training_1 () transcripts.printf_information ('Began training.') transcripts.printf_input ('Prototypes == %d;', self.get_prototype_count ()) transcripts.printf_input ('Vector length == %d;', self.get_vector_length ())
def end_classification (self) : transcripts.printf_information ('Ending classification...') self.end_classification_1 () self.classification_stepper.destroy () self.classification_stepper = None self.classification_cache = None transcripts.printf_information ('Ended classification.')
def store_to_file (self, path) : transcripts.printf_information ('Storing data to file [%s]...', path) if os.path.exists (path) : transcripts.printf_warning ('File does exist; overwriting.') stream = file (path, 'wb', 512 * 1024) self.store_to_stream (stream) stream.close ()
def end_validation (self) : transcripts.printf_information ('Ending validation...') self.end_validation_1 () self.classifier.end_classification () self.classifier = None self.stepper.destroy () self.stepper = None transcripts.printf_information ('Ended validation.')
def begin_classification (self) : transcripts.printf_information ('Begining classification...') self.classification_cache = dict () self.classification_stepper = transcripts.Stepper ('Classification') self.begin_classification_1 () transcripts.printf_information ('Began classification.') transcripts.printf_input ('Prototypes == %d;', self.get_prototype_count ()) transcripts.printf_input ('Vector length == %d;', self.get_vector_length ())
def load_from_file (self, path) : transcripts.printf_information ('Loading data from file [%s]...', path) if not os.path.exists (path) : transcripts.printf_warning ('File does not exist; aborting.') return stream = file (path, 'rb', 512 * 1024) self.load_from_stream (stream) stream.close ()
def handle_file(path, dumper): transcripts.printf_information("Parsing file [%s]...", path) stream = file(path, "rt") first_line = stream.readline() if first_line != '<!DOCTYPE lewis SYSTEM "lewis.dtd">\n': raise Exception() handle_stream(stream, dumper) stream.close()
def end_training (self) : transcripts.printf_information ('Ending training...') self.end_training_1 () self.training_stepper.destroy () self.training_stepper = None self.executed_training = True transcripts.printf_information ('Ended training.') transcripts.printf_output ('Prototypes == %d;', self.get_prototype_count ()) transcripts.printf_output ('Vector length == %d;', self.get_vector_length ())
def store (self) : transcripts.printf_information ('Storing data to stream [%s]...', self.data_path) stream = streams.open (self.data_path, 'o') if stream is None : transcripts.printf_warning ('Stream does not exist; aborting.') pickle.dump (self.document_identifiers, stream) pickle.dump (self.document_titles, stream) pickle.dump (self.document_contents, stream) pickle.dump (self.document_vectors, stream) stream.close ()
def load_content (path) : transcripts.printf_information ('Parsing file [%s]...', path) stream = file (path, 'rt') if path.lower () .endswith ('.pdf') : content = execute_load_content (['/usr/bin/pdftotext', path, '-']) elif path.lower () .endswith ('.ps') : content = execute_load_content (['/usr/bin/pstotext', path]) else : transcripts.printf_error ('File extension is unknown; ignoring.') content = None return content
def load (self) : transcripts.printf_information ('Loading data from stream [%s]...', self.data_path) stream = streams.open (self.data_path, 'i') if stream is None : transcripts.printf_warning ('Stream does not exist; aborting.') return self.document_identifiers = pickle.load (stream) self.document_titles = pickle.load (stream) self.document_contents = pickle.load (stream) self.document_vectors = pickle.load (stream) stream.close ()
def load_from_stream (self, stream) : prototypes = vectors.input_ (stream) if len (prototypes) > 0 : vector_length = len (prototypes[0]) for prototype in prototypes : if vector_length != len (prototype) : raise Exception () else : vector_length = None self.pre_load (prototypes, vector_length) self.prototypes = prototypes self.vector_length = vector_length self.post_load () transcripts.printf_information ('Loaded from stream.') transcripts.printf_output ('Prototypes == %d;', self.get_prototype_count ()) transcripts.printf_output ('Vector length == %d;', self.get_vector_length ())