def test_with_two_characters(self): #test with just two letters so A and B are copied to a #special dir that is deleted after the test base_dir = File("../../character_examples") test_dir = File(base_dir,"test") a_dir = File(base_dir,"A") b_dir = File(base_dir,"B") shutil.copytree(a_dir.getPath(), File(test_dir,"A").getPath()) shutil.copytree(b_dir.getPath(), File(test_dir,"B").getPath()) extracor = SimpleImageFeatureExtractor(nr_of_divisions=7, size_classification_factor=1.3) #Extract features training_examples, test_examples = extracor.extract_training_and_test_examples(test_dir.getPath(), 90, 10) #print("training examples", training_examples) #print("testing examples", test_examples) classifier = CharacterClassifier(training_examples, nr_of_hmms_to_try = 1, fraction_of_examples_for_test = 0.3, feature_extractor=extracor, train_with_examples=False) before = classifier.test(test_examples) #Test serialization classifier_string = classifier.to_string() reborn_classifier = CharacterClassifier(from_string_string=classifier_string) reborn_classifier_test_result = reborn_classifier.test(test_examples) if(reborn_classifier_test_result==before): pass else: raise "Something is wrong with the test result" classifier.train() after = classifier.test(test_examples) print("test_with_two_characters", "before", before, "after", after) shutil.rmtree(test_dir.getPath())
def test_init_method_different_parameters(self): test_dir = File("../../character_examples").getPath() nr_of_training_examples = 90 nr_of_test_examples = 10 for size_classification_factor in drange(0.7, 6.0, 0.3): print str(size_classification_factor) + ' &', for nr_of_segs in range(4, 13): #print(nr_of_segs) test_scores = [] for test_nr in range(10): #print(test_nr) extracor = SimpleImageFeatureExtractor( nr_of_divisions=nr_of_segs, size_classification_factor=size_classification_factor) training_examples, test_examples = extracor.extract_training_and_test_examples( test_dir, nr_of_training_examples, nr_of_test_examples) classifier = CharacterClassifier( training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=SpecializedHMM.InitMethod. count_based) test_scores.append(classifier.test(test_examples)) score = sum(test_scores) / len(test_scores) print ' $' + str(score) + '$ ', if nr_of_segs == 12: print '\\\\' else: print '&',
def test_init_method(self, nr_of_segments=7, size_classification_factor=1.3, only_count_based_init=False): '''Test with different number of training examples and compare random init with count based init''' test_dir = File("../../character_examples").getPath() nr_of_training_examples = 90 nr_of_test_examples = 10 extracor = SimpleImageFeatureExtractor(nr_of_divisions=nr_of_segments, size_classification_factor=size_classification_factor) training_examples, test_examples = extracor.extract_training_and_test_examples(test_dir, nr_of_training_examples, nr_of_test_examples) def get_examples(nr_of_examples): if(nr_of_examples!=90): raise "Illegal amount of examples" else: return training_examples def get_character_classifier_with_init_method(traing_examples, init_method): return CharacterClassifier(traing_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=init_method) self.test_init_method_with_classifier(get_examples, get_character_classifier_with_init_method, test_examples, [90], only_count_based_init=only_count_based_init)
def test_with_two_characters(self): #test with just two letters so A and B are copied to a #special dir that is deleted after the test base_dir = File("../../character_examples") test_dir = File(base_dir, "test") a_dir = File(base_dir, "A") b_dir = File(base_dir, "B") shutil.copytree(a_dir.getPath(), File(test_dir, "A").getPath()) shutil.copytree(b_dir.getPath(), File(test_dir, "B").getPath()) extracor = SimpleImageFeatureExtractor(nr_of_divisions=7, size_classification_factor=1.3) #Extract features training_examples, test_examples = extracor.extract_training_and_test_examples( test_dir.getPath(), 90, 10) #print("training examples", training_examples) #print("testing examples", test_examples) classifier = CharacterClassifier(training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0.3, feature_extractor=extracor, train_with_examples=False) before = classifier.test(test_examples) #Test serialization classifier_string = classifier.to_string() reborn_classifier = CharacterClassifier( from_string_string=classifier_string) reborn_classifier_test_result = reborn_classifier.test(test_examples) if (reborn_classifier_test_result == before): pass else: raise "Something is wrong with the test result" classifier.train() after = classifier.test(test_examples) print("test_with_two_characters", "before", before, "after", after) shutil.rmtree(test_dir.getPath())
def test_init_method(self, nr_of_segments=7, size_classification_factor=1.3, only_count_based_init=False): '''Test with different number of training examples and compare random init with count based init''' test_dir = File("../../character_examples").getPath() nr_of_training_examples = 90 nr_of_test_examples = 10 extracor = SimpleImageFeatureExtractor( nr_of_divisions=nr_of_segments, size_classification_factor=size_classification_factor) training_examples, test_examples = extracor.extract_training_and_test_examples( test_dir, nr_of_training_examples, nr_of_test_examples) def get_examples(nr_of_examples): if (nr_of_examples != 90): raise "Illegal amount of examples" else: return training_examples def get_character_classifier_with_init_method(traing_examples, init_method): return CharacterClassifier(traing_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=init_method) self.test_init_method_with_classifier( get_examples, get_character_classifier_with_init_method, test_examples, [90], only_count_based_init=only_count_based_init)
def create_character_classification_count_matrix(): ''' This function do the following: 1. Creates a character classifier with 90 training examples 2. Runs a test with 10 test examples for every character. The results are put in a matrix M so element M[i][j] contains the number of test examples for the character with index i that are classified to be the character with index j. ''' extracor = SimpleImageFeatureExtractor(nr_of_divisions=11, size_classification_factor=3.4) training_examples, test_examples = extracor.extract_training_and_test_examples(File(File(File(".."),".."),"character_examples").getCanonicalPath(), nr_of_training_examples=90, nr_of_test_examples=10) classifier = CharacterClassifier(training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=SpecializedHMM.InitMethod.count_based) alphabet = get_example_alphabet() classification_count_matrix = zeros(len(alphabet),len(alphabet)) for label, examples in test_examples: character = label.lower() character_index = alphabet.index(character) for example in examples: classified_character = classifier.classify_character_string(example).lower() classified_character_index = alphabet.index(classified_character) count = classification_count_matrix[character_index][classified_character_index] classification_count_matrix[character_index][classified_character_index] = count + 1 return classification_count_matrix
def test_init_method_different_parameters(self): test_dir = File("../../character_examples").getPath() nr_of_training_examples = 90 nr_of_test_examples = 10 for size_classification_factor in drange(0.7, 6.0, 0.3): print str(size_classification_factor) + ' &', for nr_of_segs in range(4,13): #print(nr_of_segs) test_scores = [] for test_nr in range(10): #print(test_nr) extracor = SimpleImageFeatureExtractor(nr_of_divisions=nr_of_segs, size_classification_factor=size_classification_factor) training_examples, test_examples = extracor.extract_training_and_test_examples(test_dir, nr_of_training_examples, nr_of_test_examples) classifier = CharacterClassifier(training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=SpecializedHMM.InitMethod.count_based) test_scores.append(classifier.test(test_examples)) score = sum(test_scores) / len(test_scores) print ' $' + str(score) +'$ ', if nr_of_segs == 12: print '\\\\' else: print '&',
def create_character_classification_count_matrix(): ''' This function do the following: 1. Creates a character classifier with 90 training examples 2. Runs a test with 10 test examples for every character. The results are put in a matrix M so element M[i][j] contains the number of test examples for the character with index i that are classified to be the character with index j. ''' extracor = SimpleImageFeatureExtractor(nr_of_divisions=11, size_classification_factor=3.4) training_examples, test_examples = extracor.extract_training_and_test_examples( File(File(File(".."), ".."), "character_examples").getCanonicalPath(), nr_of_training_examples=90, nr_of_test_examples=10) classifier = CharacterClassifier( training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=SpecializedHMM.InitMethod.count_based) alphabet = get_example_alphabet() classification_count_matrix = zeros(len(alphabet), len(alphabet)) for label, examples in test_examples: character = label.lower() character_index = alphabet.index(character) for example in examples: classified_character = classifier.classify_character_string( example).lower() classified_character_index = alphabet.index(classified_character) count = classification_count_matrix[character_index][ classified_character_index] classification_count_matrix[character_index][ classified_character_index] = count + 1 return classification_count_matrix
def extract_test_examples_to_file(self): extractor = SimpleImageFeatureExtractor(nr_of_divisions=self.feature_extraction_number_of_segments, size_classification_factor=self.feature_extraction_classification_factor) examples_dir = File(File(File(File(str(inspect.getfile( inspect.currentframe() ))).getParent(),".."),".."),"word_examples_for_test").getCanonicalPath() empty, character_test_examples = extractor.extract_training_and_test_examples(examples_dir, #character_examples word_examples_for_test nr_of_training_examples=0, nr_of_test_examples=10) output = open('datatest_segments_' + str(self.feature_extraction_number_of_segments) + '_cf_'+ str(self.feature_extraction_classification_factor).replace('.','_')+ '.pkl', 'wb') pickle.dump(character_test_examples, output) output.close()
def __init__(self, characters_with_examples=None, nr_of_hmms_to_try=3, fraction_of_examples_for_test=0.1, train_with_examples=True, initialisation_method=SpecializedHMM.InitMethod.count_based, feature_extractor=None, from_string_string=None): ''' See WordClassifier ''' if from_string_string != None: #init from string #"\n\n"+ in the next row is for jython bug 1469 feature_extractor_parameters, classifer_string = eval( "\n\n" + from_string_string) nr_of_divisions, size_classification_factor = feature_extractor_parameters self.feature_extractor = SimpleImageFeatureExtractor( nr_of_divisions, size_classification_factor) self.nr_of_segments = nr_of_divisions super(CharacterClassifier, self).__init__(from_string_string=classifer_string) return #Feature extractor is voluntary but is necessary if the classify_image #method shall be used self.feature_extractor = feature_extractor #Get the number of segments created by the feature extractor #by looking at the length of a training example label, examples = characters_with_examples[0] self.nr_of_segments = len(examples[0]) new_characters_with_examples = [] for label, examples in characters_with_examples: new_characters_with_examples.append( (label * self.nr_of_segments, examples)) super(CharacterClassifier, self).__init__(new_characters_with_examples, nr_of_hmms_to_try, fraction_of_examples_for_test, train_with_examples, initialisation_method, alphabet=SimpleImageFeatureExtractor.feature_ids)
def create_character_classifier(save_to_file_path): example_dir = File("../../character_examples").getPath() nr_of_training_examples = 100 nr_of_test_examples = 0 extractor = SimpleImageFeatureExtractor(nr_of_divisions=7, size_classification_factor=1.3) training_examples, test_examples = extractor.extract_training_and_test_examples(example_dir, nr_of_training_examples, nr_of_test_examples) classifier = CharacterClassifier(training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=SpecializedHMM.InitMethod.count_based, feature_extractor=extractor) classifier_string = classifier.to_string() file = open(save_to_file_path,'w') file.write(classifier_string) file.close()
def create_character_classifier(save_to_file_path): example_dir = File("../../character_examples").getPath() nr_of_training_examples = 90 nr_of_test_examples = 10 extractor = SimpleImageFeatureExtractor(nr_of_divisions=11, size_classification_factor=4.6) training_examples, test_examples = extractor.extract_training_and_test_examples(example_dir, nr_of_training_examples, nr_of_test_examples) classifier = CharacterClassifier(training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=True, initialisation_method=SpecializedHMM.InitMethod.count_based, feature_extractor=extractor) test_result = str(classifier.test(test_examples)) print('Prediction ratio:', test_result)
def extract_test_examples_to_file(self): extractor = SimpleImageFeatureExtractor( nr_of_divisions=self.feature_extraction_number_of_segments, size_classification_factor=self. feature_extraction_classification_factor) examples_dir = File( File( File( File(str(inspect.getfile( inspect.currentframe()))).getParent(), ".."), ".."), "word_examples_for_test").getCanonicalPath() empty, character_test_examples = extractor.extract_training_and_test_examples( examples_dir, #character_examples word_examples_for_test nr_of_training_examples=0, nr_of_test_examples=10) output = open( 'datatest_segments_' + str(self.feature_extraction_number_of_segments) + '_cf_' + str(self.feature_extraction_classification_factor).replace( '.', '_') + '.pkl', 'wb') pickle.dump(character_test_examples, output) output.close()
def create_character_classifier(save_to_file_path): example_dir = File("../../character_examples").getPath() nr_of_training_examples = 100 nr_of_test_examples = 0 extractor = SimpleImageFeatureExtractor(nr_of_divisions=11, size_classification_factor=4.6) training_examples, test_examples = extractor.extract_training_and_test_examples( example_dir, nr_of_training_examples, nr_of_test_examples) classifier = CharacterClassifier( training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=SpecializedHMM.InitMethod.count_based, feature_extractor=extractor) #test_result = str(classifier.test(test_examples)) #print(test_result) classifier_string = classifier.to_string() file = open(save_to_file_path + ".dat", 'w') file.write(classifier_string) file.close()
def __init__(self, characters_with_examples=None, nr_of_hmms_to_try=3, fraction_of_examples_for_test=0.1, train_with_examples=True, initialisation_method=SpecializedHMM.InitMethod.count_based, feature_extractor=None, from_string_string=None): ''' See WordClassifier ''' if from_string_string != None: #init from string #"\n\n"+ in the next row is for jython bug 1469 feature_extractor_parameters,classifer_string = eval("\n\n"+from_string_string) nr_of_divisions,size_classification_factor = feature_extractor_parameters self.feature_extractor = SimpleImageFeatureExtractor(nr_of_divisions, size_classification_factor) self.nr_of_segments = nr_of_divisions super(CharacterClassifier,self).__init__(from_string_string=classifer_string) return #Feature extractor is voluntary but is necessary if the classify_image #method shall be used self.feature_extractor = feature_extractor #Get the number of segments created by the feature extractor #by looking at the length of a training example label,examples = characters_with_examples[0] self.nr_of_segments = len(examples[0]) new_characters_with_examples = [] for label,examples in characters_with_examples: new_characters_with_examples.append((label*self.nr_of_segments,examples)) super(CharacterClassifier,self).__init__(new_characters_with_examples, nr_of_hmms_to_try, fraction_of_examples_for_test, train_with_examples, initialisation_method, alphabet=SimpleImageFeatureExtractor.feature_ids)
class CharacterClassifier(WordClassifier): ''' Works as WordClassifier with some extra features for character classification ''' def __init__(self, characters_with_examples=None, nr_of_hmms_to_try=3, fraction_of_examples_for_test=0.1, train_with_examples=True, initialisation_method=SpecializedHMM.InitMethod.count_based, feature_extractor=None, from_string_string=None): ''' See WordClassifier ''' if from_string_string != None: #init from string #"\n\n"+ in the next row is for jython bug 1469 feature_extractor_parameters, classifer_string = eval( "\n\n" + from_string_string) nr_of_divisions, size_classification_factor = feature_extractor_parameters self.feature_extractor = SimpleImageFeatureExtractor( nr_of_divisions, size_classification_factor) self.nr_of_segments = nr_of_divisions super(CharacterClassifier, self).__init__(from_string_string=classifer_string) return #Feature extractor is voluntary but is necessary if the classify_image #method shall be used self.feature_extractor = feature_extractor #Get the number of segments created by the feature extractor #by looking at the length of a training example label, examples = characters_with_examples[0] self.nr_of_segments = len(examples[0]) new_characters_with_examples = [] for label, examples in characters_with_examples: new_characters_with_examples.append( (label * self.nr_of_segments, examples)) super(CharacterClassifier, self).__init__(new_characters_with_examples, nr_of_hmms_to_try, fraction_of_examples_for_test, train_with_examples, initialisation_method, alphabet=SimpleImageFeatureExtractor.feature_ids) def classify_character_string(self, string): classification = super(CharacterClassifier, self).classify(string) return classification[0] def classify_image(self, buffered_image): string = self.feature_extractor.extract_feature_string(buffered_image) return self.classify_character_string(string) def test(self, test_examples): ''' See WordClassifier.test() ''' new_test_examples = [] for label, examples in test_examples: new_test_examples.append((label * self.nr_of_segments, examples)) return super(CharacterClassifier, self).test(new_test_examples) def to_string(self): if self.feature_extractor == None: raise "feature_extractor must be given if the character classifier shall be stringified" else: feature_extractor_parameters = ( self.feature_extractor.nr_of_divisions, self.feature_extractor.size_classification_factor) word_classifier_string = super(CharacterClassifier, self).to_string() return str((feature_extractor_parameters, word_classifier_string))
class CharacterClassifier(WordClassifier): ''' Works as WordClassifier with some extra features for character classification ''' def __init__(self, characters_with_examples=None, nr_of_hmms_to_try=3, fraction_of_examples_for_test=0.1, train_with_examples=True, initialisation_method=SpecializedHMM.InitMethod.count_based, feature_extractor=None, from_string_string=None): ''' See WordClassifier ''' if from_string_string != None: #init from string #"\n\n"+ in the next row is for jython bug 1469 feature_extractor_parameters,classifer_string = eval("\n\n"+from_string_string) nr_of_divisions,size_classification_factor = feature_extractor_parameters self.feature_extractor = SimpleImageFeatureExtractor(nr_of_divisions, size_classification_factor) self.nr_of_segments = nr_of_divisions super(CharacterClassifier,self).__init__(from_string_string=classifer_string) return #Feature extractor is voluntary but is necessary if the classify_image #method shall be used self.feature_extractor = feature_extractor #Get the number of segments created by the feature extractor #by looking at the length of a training example label,examples = characters_with_examples[0] self.nr_of_segments = len(examples[0]) new_characters_with_examples = [] for label,examples in characters_with_examples: new_characters_with_examples.append((label*self.nr_of_segments,examples)) super(CharacterClassifier,self).__init__(new_characters_with_examples, nr_of_hmms_to_try, fraction_of_examples_for_test, train_with_examples, initialisation_method, alphabet=SimpleImageFeatureExtractor.feature_ids) def classify_character_string(self,string): classification = super(CharacterClassifier, self).classify(string) return classification[0] def classify_image(self,buffered_image): string = self.feature_extractor.extract_feature_string(buffered_image) return self.classify_character_string(string) def test(self,test_examples): ''' See WordClassifier.test() ''' new_test_examples = [] for label, examples in test_examples: new_test_examples.append((label * self.nr_of_segments, examples)) return super(CharacterClassifier, self).test(new_test_examples) def to_string(self): if self.feature_extractor == None: raise "feature_extractor must be given if the character classifier shall be stringified" else: feature_extractor_parameters = (self.feature_extractor.nr_of_divisions, self.feature_extractor.size_classification_factor) word_classifier_string = super(CharacterClassifier,self).to_string() return str((feature_extractor_parameters, word_classifier_string))