예제 #1
0
def generate_sets(neighbours, blur_scale, verbose=0):
    """Split the entire dataset into a trainingset and a testset."""
    suffix = '_%s_%s' % (blur_scale, neighbours)
    learning_set_file = 'learning_set%s.dat' % suffix
    test_set_file = 'test_set%s.dat' % suffix

    chars = load_characters(neighbours, blur_scale, verbose=verbose)

    if verbose:
        print 'Going to generate learning set and test set...'

    learning_set = []
    test_set = []
    learned = []

    for char in chars:
        if learned.count(char.value) == 70:
            test_set.append(char)
        else:
            learning_set.append(char)
            learned.append(char.value)

    if verbose:
        print 'Learning set:', [c.value for c in learning_set]
        print '\nTest set:', [c.value for c in test_set]
        print '\nSaving learning set...'

    fdump(learning_set, learning_set_file)

    if verbose:
        print 'Saving test set...'

    fdump(test_set, test_set_file)

    return learning_set, test_set
예제 #2
0
def load_characters(neighbours, blur_scale, verbose=0):
    chars_file = 'characters_%s_%s.dat' % (blur_scale, neighbours)

    if exists(chars_file):
        print 'Loading characters...'
        chars = fload(chars_file)
    else:
        print 'Going to generate character objects...'
        chars = []

        for char in sorted(listdir(IMAGES_FOLDER)):
            count = 0

            for image in sorted(listdir(IMAGES_FOLDER + char)):
                image = GrayscaleImage(IMAGES_FOLDER + char + '/' + image)
                norm = NormalizedCharacterImage(image, blur=blur_scale, \
                                                height=NORMALIZED_HEIGHT)
                character = Character(char, [], norm)
                character.get_single_cell_feature_vector(neighbours)
                chars.append(character)

                count += 1

                if verbose:
                    print 'Loaded character %s %d times' % (char, count)

        if verbose:
            print 'Saving characters...'

        fdump(chars, chars_file)

    return chars