예제 #1
0
 def test_with_two_characters(self):
     #test with just two letters so A and B are copied to a 
     #special dir that is deleted after the test
     base_dir = File("../../character_examples")
     test_dir = File(base_dir,"test")
     a_dir = File(base_dir,"A")
     b_dir = File(base_dir,"B")
     shutil.copytree(a_dir.getPath(), File(test_dir,"A").getPath())
     shutil.copytree(b_dir.getPath(), File(test_dir,"B").getPath())
     extracor = SimpleImageFeatureExtractor(nr_of_divisions=7, 
                                            size_classification_factor=1.3)
     #Extract features
     training_examples, test_examples = extracor.extract_training_and_test_examples(test_dir.getPath(), 90, 10)
     #print("training examples", training_examples)
     #print("testing examples", test_examples)
     classifier = CharacterClassifier(training_examples, 
                                      nr_of_hmms_to_try = 1, 
                                      fraction_of_examples_for_test = 0.3,
                                      feature_extractor=extracor,
                                      train_with_examples=False)
     before = classifier.test(test_examples)
     #Test serialization
     classifier_string = classifier.to_string()
     reborn_classifier = CharacterClassifier(from_string_string=classifier_string)
     reborn_classifier_test_result = reborn_classifier.test(test_examples)
     if(reborn_classifier_test_result==before):
         pass
     else:
         raise "Something is wrong with the test result"
     classifier.train()
     after = classifier.test(test_examples)
     print("test_with_two_characters", "before", before, "after", after)
     shutil.rmtree(test_dir.getPath())
 def test_init_method_different_parameters(self):
     test_dir = File("../../character_examples").getPath()
     nr_of_training_examples = 90
     nr_of_test_examples = 10
     for size_classification_factor in drange(0.7, 6.0, 0.3):
         print str(size_classification_factor) + ' &',
         for nr_of_segs in range(4, 13):
             #print(nr_of_segs)
             test_scores = []
             for test_nr in range(10):
                 #print(test_nr)
                 extracor = SimpleImageFeatureExtractor(
                     nr_of_divisions=nr_of_segs,
                     size_classification_factor=size_classification_factor)
                 training_examples, test_examples = extracor.extract_training_and_test_examples(
                     test_dir, nr_of_training_examples, nr_of_test_examples)
                 classifier = CharacterClassifier(
                     training_examples,
                     nr_of_hmms_to_try=1,
                     fraction_of_examples_for_test=0,
                     train_with_examples=False,
                     initialisation_method=SpecializedHMM.InitMethod.
                     count_based)
                 test_scores.append(classifier.test(test_examples))
             score = sum(test_scores) / len(test_scores)
             print ' $' + str(score) + '$ ',
             if nr_of_segs == 12:
                 print '\\\\'
             else:
                 print '&',
 def test_init_method(self, 
                      nr_of_segments=7, 
                      size_classification_factor=1.3,
                      only_count_based_init=False):
     '''Test with different number of training examples and compare
     random init with count based init'''
     
     test_dir = File("../../character_examples").getPath()
     nr_of_training_examples = 90
     nr_of_test_examples = 10
     extracor = SimpleImageFeatureExtractor(nr_of_divisions=nr_of_segments, 
                                            size_classification_factor=size_classification_factor)
     training_examples, test_examples = extracor.extract_training_and_test_examples(test_dir, 
                                                                                    nr_of_training_examples, 
                                                                                    nr_of_test_examples)
     
     def get_examples(nr_of_examples):
         if(nr_of_examples!=90):
             raise "Illegal amount of examples"
         else:
             return training_examples
         
     
     def get_character_classifier_with_init_method(traing_examples, init_method):
         return CharacterClassifier(traing_examples,
                                    nr_of_hmms_to_try=1,
                                    fraction_of_examples_for_test=0,
                                    train_with_examples=False,
                                    initialisation_method=init_method)
     
     self.test_init_method_with_classifier(get_examples, 
                                           get_character_classifier_with_init_method,
                                           test_examples,
                                           [90],
                                           only_count_based_init=only_count_based_init)
예제 #4
0
 def test_with_two_characters(self):
     #test with just two letters so A and B are copied to a
     #special dir that is deleted after the test
     base_dir = File("../../character_examples")
     test_dir = File(base_dir, "test")
     a_dir = File(base_dir, "A")
     b_dir = File(base_dir, "B")
     shutil.copytree(a_dir.getPath(), File(test_dir, "A").getPath())
     shutil.copytree(b_dir.getPath(), File(test_dir, "B").getPath())
     extracor = SimpleImageFeatureExtractor(nr_of_divisions=7,
                                            size_classification_factor=1.3)
     #Extract features
     training_examples, test_examples = extracor.extract_training_and_test_examples(
         test_dir.getPath(), 90, 10)
     #print("training examples", training_examples)
     #print("testing examples", test_examples)
     classifier = CharacterClassifier(training_examples,
                                      nr_of_hmms_to_try=1,
                                      fraction_of_examples_for_test=0.3,
                                      feature_extractor=extracor,
                                      train_with_examples=False)
     before = classifier.test(test_examples)
     #Test serialization
     classifier_string = classifier.to_string()
     reborn_classifier = CharacterClassifier(
         from_string_string=classifier_string)
     reborn_classifier_test_result = reborn_classifier.test(test_examples)
     if (reborn_classifier_test_result == before):
         pass
     else:
         raise "Something is wrong with the test result"
     classifier.train()
     after = classifier.test(test_examples)
     print("test_with_two_characters", "before", before, "after", after)
     shutil.rmtree(test_dir.getPath())
    def test_init_method(self,
                         nr_of_segments=7,
                         size_classification_factor=1.3,
                         only_count_based_init=False):
        '''Test with different number of training examples and compare
        random init with count based init'''

        test_dir = File("../../character_examples").getPath()
        nr_of_training_examples = 90
        nr_of_test_examples = 10
        extracor = SimpleImageFeatureExtractor(
            nr_of_divisions=nr_of_segments,
            size_classification_factor=size_classification_factor)
        training_examples, test_examples = extracor.extract_training_and_test_examples(
            test_dir, nr_of_training_examples, nr_of_test_examples)

        def get_examples(nr_of_examples):
            if (nr_of_examples != 90):
                raise "Illegal amount of examples"
            else:
                return training_examples

        def get_character_classifier_with_init_method(traing_examples,
                                                      init_method):
            return CharacterClassifier(traing_examples,
                                       nr_of_hmms_to_try=1,
                                       fraction_of_examples_for_test=0,
                                       train_with_examples=False,
                                       initialisation_method=init_method)

        self.test_init_method_with_classifier(
            get_examples,
            get_character_classifier_with_init_method,
            test_examples, [90],
            only_count_based_init=only_count_based_init)
def create_character_classification_count_matrix():
    '''
    This function do the following:
    
    1. Creates a character classifier with 90 training examples
    2. Runs a test with 10 test examples for every character. The results are put in a 
       matrix M so element M[i][j] contains the number of test examples for the character
       with index i that are classified to be the character with index j. 
    '''
    extracor = SimpleImageFeatureExtractor(nr_of_divisions=11, 
                                           size_classification_factor=3.4)
    training_examples, test_examples = extracor.extract_training_and_test_examples(File(File(File(".."),".."),"character_examples").getCanonicalPath(), 
                                                                                   nr_of_training_examples=90,
                                                                                   nr_of_test_examples=10)
    classifier = CharacterClassifier(training_examples,
                                     nr_of_hmms_to_try=1,
                                     fraction_of_examples_for_test=0,
                                     train_with_examples=False,
                                     initialisation_method=SpecializedHMM.InitMethod.count_based)
    alphabet = get_example_alphabet()
    classification_count_matrix = zeros(len(alphabet),len(alphabet))
    for label, examples in test_examples:
        character = label.lower()
        character_index = alphabet.index(character)
        for example in examples:
            classified_character = classifier.classify_character_string(example).lower()
            classified_character_index = alphabet.index(classified_character)
            count = classification_count_matrix[character_index][classified_character_index]
            classification_count_matrix[character_index][classified_character_index] = count + 1
    return classification_count_matrix
 def test_init_method_different_parameters(self):
     test_dir = File("../../character_examples").getPath()
     nr_of_training_examples = 90
     nr_of_test_examples = 10
     for size_classification_factor in drange(0.7, 6.0, 0.3):
         print str(size_classification_factor) + ' &',
         for nr_of_segs in range(4,13):
             #print(nr_of_segs)
             test_scores = []
             for test_nr in range(10):
                 #print(test_nr)
                 extracor = SimpleImageFeatureExtractor(nr_of_divisions=nr_of_segs, 
                                                        size_classification_factor=size_classification_factor)
                 training_examples, test_examples = extracor.extract_training_and_test_examples(test_dir, 
                                                                                                nr_of_training_examples, 
                                                                                                nr_of_test_examples)
                 classifier = CharacterClassifier(training_examples,
                                                  nr_of_hmms_to_try=1,
                                                  fraction_of_examples_for_test=0,
                                                  train_with_examples=False,
                                                  initialisation_method=SpecializedHMM.InitMethod.count_based)
                 test_scores.append(classifier.test(test_examples))
             score = sum(test_scores) / len(test_scores)
             print ' $' + str(score) +'$ ',
             if nr_of_segs == 12:
                 print '\\\\'
             else:
                 print '&',
예제 #8
0
def create_character_classification_count_matrix():
    '''
    This function do the following:
    
    1. Creates a character classifier with 90 training examples
    2. Runs a test with 10 test examples for every character. The results are put in a 
       matrix M so element M[i][j] contains the number of test examples for the character
       with index i that are classified to be the character with index j. 
    '''
    extracor = SimpleImageFeatureExtractor(nr_of_divisions=11,
                                           size_classification_factor=3.4)
    training_examples, test_examples = extracor.extract_training_and_test_examples(
        File(File(File(".."), ".."), "character_examples").getCanonicalPath(),
        nr_of_training_examples=90,
        nr_of_test_examples=10)
    classifier = CharacterClassifier(
        training_examples,
        nr_of_hmms_to_try=1,
        fraction_of_examples_for_test=0,
        train_with_examples=False,
        initialisation_method=SpecializedHMM.InitMethod.count_based)
    alphabet = get_example_alphabet()
    classification_count_matrix = zeros(len(alphabet), len(alphabet))
    for label, examples in test_examples:
        character = label.lower()
        character_index = alphabet.index(character)
        for example in examples:
            classified_character = classifier.classify_character_string(
                example).lower()
            classified_character_index = alphabet.index(classified_character)
            count = classification_count_matrix[character_index][
                classified_character_index]
            classification_count_matrix[character_index][
                classified_character_index] = count + 1
    return classification_count_matrix
 def extract_test_examples_to_file(self):
     extractor = SimpleImageFeatureExtractor(nr_of_divisions=self.feature_extraction_number_of_segments, 
                                 size_classification_factor=self.feature_extraction_classification_factor)
     examples_dir = File(File(File(File(str(inspect.getfile( inspect.currentframe() ))).getParent(),".."),".."),"word_examples_for_test").getCanonicalPath()
     empty, character_test_examples = extractor.extract_training_and_test_examples(examples_dir, #character_examples word_examples_for_test
                                                                         nr_of_training_examples=0,
                                                                         nr_of_test_examples=10)
                                                                         
     output = open('datatest_segments_' +
                   str(self.feature_extraction_number_of_segments) + '_cf_'+
                   str(self.feature_extraction_classification_factor).replace('.','_')+
                   '.pkl', 'wb')
     pickle.dump(character_test_examples, output)
     output.close()
예제 #10
0
 def __init__(self,
              characters_with_examples=None,
              nr_of_hmms_to_try=3,
              fraction_of_examples_for_test=0.1,
              train_with_examples=True,
              initialisation_method=SpecializedHMM.InitMethod.count_based,
              feature_extractor=None,
              from_string_string=None):
     '''
     See WordClassifier
     '''
     if from_string_string != None:
         #init from string
         #"\n\n"+ in the next row is for jython bug 1469
         feature_extractor_parameters, classifer_string = eval(
             "\n\n" + from_string_string)
         nr_of_divisions, size_classification_factor = feature_extractor_parameters
         self.feature_extractor = SimpleImageFeatureExtractor(
             nr_of_divisions, size_classification_factor)
         self.nr_of_segments = nr_of_divisions
         super(CharacterClassifier,
               self).__init__(from_string_string=classifer_string)
         return
     #Feature extractor is voluntary but is necessary if the classify_image
     #method shall be used
     self.feature_extractor = feature_extractor
     #Get the number of segments created by the feature extractor
     #by looking at the length of a training example
     label, examples = characters_with_examples[0]
     self.nr_of_segments = len(examples[0])
     new_characters_with_examples = []
     for label, examples in characters_with_examples:
         new_characters_with_examples.append(
             (label * self.nr_of_segments, examples))
     super(CharacterClassifier,
           self).__init__(new_characters_with_examples,
                          nr_of_hmms_to_try,
                          fraction_of_examples_for_test,
                          train_with_examples,
                          initialisation_method,
                          alphabet=SimpleImageFeatureExtractor.feature_ids)
예제 #11
0
def create_character_classifier(save_to_file_path):
    example_dir = File("../../character_examples").getPath()
    nr_of_training_examples = 100
    nr_of_test_examples = 0
    
    extractor = SimpleImageFeatureExtractor(nr_of_divisions=7, 
                                           size_classification_factor=1.3)
    
    training_examples, test_examples = extractor.extract_training_and_test_examples(example_dir, 
                                                                                   nr_of_training_examples, 
                                                                                   nr_of_test_examples)
    classifier = CharacterClassifier(training_examples,
                                     nr_of_hmms_to_try=1,
                                     fraction_of_examples_for_test=0,
                                     train_with_examples=False,
                                     initialisation_method=SpecializedHMM.InitMethod.count_based,
                                     feature_extractor=extractor)
    classifier_string = classifier.to_string()
    file = open(save_to_file_path,'w')
    file.write(classifier_string)
    file.close()
def create_character_classifier(save_to_file_path):
    example_dir = File("../../character_examples").getPath()
    nr_of_training_examples = 90
    nr_of_test_examples = 10
    
    extractor = SimpleImageFeatureExtractor(nr_of_divisions=11, 
                                            size_classification_factor=4.6)
    
   
    training_examples, test_examples = extractor.extract_training_and_test_examples(example_dir, 
                                                                                   nr_of_training_examples, 
                                                                                   nr_of_test_examples)
    
    classifier = CharacterClassifier(training_examples,
                                     nr_of_hmms_to_try=1,
                                     fraction_of_examples_for_test=0,
                                     train_with_examples=True,
                                     initialisation_method=SpecializedHMM.InitMethod.count_based,
                                     feature_extractor=extractor)
    test_result = str(classifier.test(test_examples))
    print('Prediction ratio:', test_result)
예제 #13
0
    def extract_test_examples_to_file(self):
        extractor = SimpleImageFeatureExtractor(
            nr_of_divisions=self.feature_extraction_number_of_segments,
            size_classification_factor=self.
            feature_extraction_classification_factor)
        examples_dir = File(
            File(
                File(
                    File(str(inspect.getfile(
                        inspect.currentframe()))).getParent(), ".."), ".."),
            "word_examples_for_test").getCanonicalPath()
        empty, character_test_examples = extractor.extract_training_and_test_examples(
            examples_dir,  #character_examples word_examples_for_test
            nr_of_training_examples=0,
            nr_of_test_examples=10)

        output = open(
            'datatest_segments_' +
            str(self.feature_extraction_number_of_segments) + '_cf_' +
            str(self.feature_extraction_classification_factor).replace(
                '.', '_') + '.pkl', 'wb')
        pickle.dump(character_test_examples, output)
        output.close()
def create_character_classifier(save_to_file_path):
    example_dir = File("../../character_examples").getPath()
    nr_of_training_examples = 100
    nr_of_test_examples = 0

    extractor = SimpleImageFeatureExtractor(nr_of_divisions=11,
                                            size_classification_factor=4.6)

    training_examples, test_examples = extractor.extract_training_and_test_examples(
        example_dir, nr_of_training_examples, nr_of_test_examples)
    classifier = CharacterClassifier(
        training_examples,
        nr_of_hmms_to_try=1,
        fraction_of_examples_for_test=0,
        train_with_examples=False,
        initialisation_method=SpecializedHMM.InitMethod.count_based,
        feature_extractor=extractor)
    #test_result = str(classifier.test(test_examples))
    #print(test_result)
    classifier_string = classifier.to_string()
    file = open(save_to_file_path + ".dat", 'w')
    file.write(classifier_string)
    file.close()
예제 #15
0
 def __init__(self,
              characters_with_examples=None,
              nr_of_hmms_to_try=3,
              fraction_of_examples_for_test=0.1,
              train_with_examples=True,
              initialisation_method=SpecializedHMM.InitMethod.count_based,
              feature_extractor=None,
              from_string_string=None):
     '''
     See WordClassifier
     '''
     if from_string_string != None:
         #init from string
         #"\n\n"+ in the next row is for jython bug 1469
         feature_extractor_parameters,classifer_string = eval("\n\n"+from_string_string)
         nr_of_divisions,size_classification_factor = feature_extractor_parameters
         self.feature_extractor = SimpleImageFeatureExtractor(nr_of_divisions, 
                                                              size_classification_factor)
         self.nr_of_segments = nr_of_divisions
         super(CharacterClassifier,self).__init__(from_string_string=classifer_string)
         return
     #Feature extractor is voluntary but is necessary if the classify_image
     #method shall be used
     self.feature_extractor = feature_extractor
     #Get the number of segments created by the feature extractor
     #by looking at the length of a training example
     label,examples = characters_with_examples[0]
     self.nr_of_segments = len(examples[0])
     new_characters_with_examples = []
     for label,examples in characters_with_examples:
         new_characters_with_examples.append((label*self.nr_of_segments,examples))
     super(CharacterClassifier,self).__init__(new_characters_with_examples,
                                              nr_of_hmms_to_try,
                                              fraction_of_examples_for_test,
                                              train_with_examples,
                                              initialisation_method,
                                              alphabet=SimpleImageFeatureExtractor.feature_ids)
예제 #16
0
class CharacterClassifier(WordClassifier):
    '''
    Works as WordClassifier with some extra features for character classification
    '''
    def __init__(self,
                 characters_with_examples=None,
                 nr_of_hmms_to_try=3,
                 fraction_of_examples_for_test=0.1,
                 train_with_examples=True,
                 initialisation_method=SpecializedHMM.InitMethod.count_based,
                 feature_extractor=None,
                 from_string_string=None):
        '''
        See WordClassifier
        '''
        if from_string_string != None:
            #init from string
            #"\n\n"+ in the next row is for jython bug 1469
            feature_extractor_parameters, classifer_string = eval(
                "\n\n" + from_string_string)
            nr_of_divisions, size_classification_factor = feature_extractor_parameters
            self.feature_extractor = SimpleImageFeatureExtractor(
                nr_of_divisions, size_classification_factor)
            self.nr_of_segments = nr_of_divisions
            super(CharacterClassifier,
                  self).__init__(from_string_string=classifer_string)
            return
        #Feature extractor is voluntary but is necessary if the classify_image
        #method shall be used
        self.feature_extractor = feature_extractor
        #Get the number of segments created by the feature extractor
        #by looking at the length of a training example
        label, examples = characters_with_examples[0]
        self.nr_of_segments = len(examples[0])
        new_characters_with_examples = []
        for label, examples in characters_with_examples:
            new_characters_with_examples.append(
                (label * self.nr_of_segments, examples))
        super(CharacterClassifier,
              self).__init__(new_characters_with_examples,
                             nr_of_hmms_to_try,
                             fraction_of_examples_for_test,
                             train_with_examples,
                             initialisation_method,
                             alphabet=SimpleImageFeatureExtractor.feature_ids)

    def classify_character_string(self, string):
        classification = super(CharacterClassifier, self).classify(string)
        return classification[0]

    def classify_image(self, buffered_image):
        string = self.feature_extractor.extract_feature_string(buffered_image)
        return self.classify_character_string(string)

    def test(self, test_examples):
        '''
        See WordClassifier.test()
        '''
        new_test_examples = []
        for label, examples in test_examples:
            new_test_examples.append((label * self.nr_of_segments, examples))
        return super(CharacterClassifier, self).test(new_test_examples)

    def to_string(self):
        if self.feature_extractor == None:
            raise "feature_extractor must be given if the character classifier shall be stringified"
        else:
            feature_extractor_parameters = (
                self.feature_extractor.nr_of_divisions,
                self.feature_extractor.size_classification_factor)
        word_classifier_string = super(CharacterClassifier, self).to_string()
        return str((feature_extractor_parameters, word_classifier_string))
예제 #17
0
class CharacterClassifier(WordClassifier):
    '''
    Works as WordClassifier with some extra features for character classification
    '''


    def __init__(self,
                 characters_with_examples=None,
                 nr_of_hmms_to_try=3,
                 fraction_of_examples_for_test=0.1,
                 train_with_examples=True,
                 initialisation_method=SpecializedHMM.InitMethod.count_based,
                 feature_extractor=None,
                 from_string_string=None):
        '''
        See WordClassifier
        '''
        if from_string_string != None:
            #init from string
            #"\n\n"+ in the next row is for jython bug 1469
            feature_extractor_parameters,classifer_string = eval("\n\n"+from_string_string)
            nr_of_divisions,size_classification_factor = feature_extractor_parameters
            self.feature_extractor = SimpleImageFeatureExtractor(nr_of_divisions, 
                                                                 size_classification_factor)
            self.nr_of_segments = nr_of_divisions
            super(CharacterClassifier,self).__init__(from_string_string=classifer_string)
            return
        #Feature extractor is voluntary but is necessary if the classify_image
        #method shall be used
        self.feature_extractor = feature_extractor
        #Get the number of segments created by the feature extractor
        #by looking at the length of a training example
        label,examples = characters_with_examples[0]
        self.nr_of_segments = len(examples[0])
        new_characters_with_examples = []
        for label,examples in characters_with_examples:
            new_characters_with_examples.append((label*self.nr_of_segments,examples))
        super(CharacterClassifier,self).__init__(new_characters_with_examples,
                                                 nr_of_hmms_to_try,
                                                 fraction_of_examples_for_test,
                                                 train_with_examples,
                                                 initialisation_method,
                                                 alphabet=SimpleImageFeatureExtractor.feature_ids)
    
    def classify_character_string(self,string):
        classification = super(CharacterClassifier, self).classify(string)
        return classification[0]
    
    def classify_image(self,buffered_image):
        string = self.feature_extractor.extract_feature_string(buffered_image)
        return self.classify_character_string(string)
    
    def test(self,test_examples):
        '''
        See WordClassifier.test()
        '''
        new_test_examples = []
        for label, examples in test_examples:
            new_test_examples.append((label * self.nr_of_segments, examples))
        return super(CharacterClassifier, self).test(new_test_examples)
    
    def to_string(self):
        if self.feature_extractor == None:
            raise "feature_extractor must be given if the character classifier shall be stringified"
        else:    
            feature_extractor_parameters = (self.feature_extractor.nr_of_divisions,
                                            self.feature_extractor.size_classification_factor)
        word_classifier_string = super(CharacterClassifier,self).to_string()
        return str((feature_extractor_parameters,
                    word_classifier_string))