def create_character_classification_count_matrix(): ''' This function do the following: 1. Creates a character classifier with 90 training examples 2. Runs a test with 10 test examples for every character. The results are put in a matrix M so element M[i][j] contains the number of test examples for the character with index i that are classified to be the character with index j. ''' extracor = SimpleImageFeatureExtractor(nr_of_divisions=11, size_classification_factor=3.4) training_examples, test_examples = extracor.extract_training_and_test_examples(File(File(File(".."),".."),"character_examples").getCanonicalPath(), nr_of_training_examples=90, nr_of_test_examples=10) classifier = CharacterClassifier(training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=SpecializedHMM.InitMethod.count_based) alphabet = get_example_alphabet() classification_count_matrix = zeros(len(alphabet),len(alphabet)) for label, examples in test_examples: character = label.lower() character_index = alphabet.index(character) for example in examples: classified_character = classifier.classify_character_string(example).lower() classified_character_index = alphabet.index(classified_character) count = classification_count_matrix[character_index][classified_character_index] classification_count_matrix[character_index][classified_character_index] = count + 1 return classification_count_matrix
def create_character_classification_count_matrix(): ''' This function do the following: 1. Creates a character classifier with 90 training examples 2. Runs a test with 10 test examples for every character. The results are put in a matrix M so element M[i][j] contains the number of test examples for the character with index i that are classified to be the character with index j. ''' extracor = SimpleImageFeatureExtractor(nr_of_divisions=11, size_classification_factor=3.4) training_examples, test_examples = extracor.extract_training_and_test_examples( File(File(File(".."), ".."), "character_examples").getCanonicalPath(), nr_of_training_examples=90, nr_of_test_examples=10) classifier = CharacterClassifier( training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=SpecializedHMM.InitMethod.count_based) alphabet = get_example_alphabet() classification_count_matrix = zeros(len(alphabet), len(alphabet)) for label, examples in test_examples: character = label.lower() character_index = alphabet.index(character) for example in examples: classified_character = classifier.classify_character_string( example).lower() classified_character_index = alphabet.index(classified_character) count = classification_count_matrix[character_index][ classified_character_index] classification_count_matrix[character_index][ classified_character_index] = count + 1 return classification_count_matrix
def __init__(self, words_with_examples=None, nr_of_hmms_to_try=3, fraction_of_examples_for_test=0.1, train_with_examples=True, initialisation_method=SpecializedHMM.InitMethod.count_based, alphabet=get_example_alphabet(), from_string_string=None): ''' Parameters: words_with_examples - is a list of tuples were the first element in the tuples is a string representing a word that the classifier should handle and the second element is a list of training examples for that word. nr_of_hmms_to_try - creates nr_of_hmms_to_try hmms for each word and selects the one with highest probability for the test examples fraction_of_examples_for_test - fraction of the training examples that will be used for test train_with_examples - if training should be perormed. Otherwise init will be done but not training All training examples will be used for both test and training if it is set to 0 ''' if from_string_string != None: #init from string #"\n\n"+ in the next row is for jython bug 1469 words,stringified_hmms = eval("\n\n"+from_string_string) def destringify_hmm(hmm_string): return WordHMM(from_string_string=hmm_string) hmms = map(destringify_hmm,stringified_hmms) self.hmms_for_words = hmms self.words = words return self.words_with_examples = words_with_examples self.nr_of_hmms_to_try = nr_of_hmms_to_try self.fraction_of_examples_for_test = fraction_of_examples_for_test self.initialisation_method = initialisation_method self.alphabet = alphabet self.train(train_with_examples)
def create_character_classification_count_matrix_from_several_tests(nr_of_tests=10): ''' This function runs the function create_character_classification_count_matrix nr_of_tests times. The sum of all resulting matrices is returned. The test examples are selected randomly so create_character_classification_count_matrix does not return the same result every time. ''' alphabet_size = len(get_example_alphabet()) result_sum = zeros(alphabet_size, alphabet_size) for i in range(nr_of_tests): result = create_character_classification_count_matrix() result_sum = add_matricis(result_sum, result) return result_sum
def create_character_classification_count_matrix_from_several_tests( nr_of_tests=10): ''' This function runs the function create_character_classification_count_matrix nr_of_tests times. The sum of all resulting matrices is returned. The test examples are selected randomly so create_character_classification_count_matrix does not return the same result every time. ''' alphabet_size = len(get_example_alphabet()) result_sum = zeros(alphabet_size, alphabet_size) for i in range(nr_of_tests): result = create_character_classification_count_matrix() result_sum = add_matricis(result_sum, result) return result_sum