Python TesseractTrainer.clean 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: lib.tesseract_training

클래스/타입: TesseractTrainer

메소드/함수: clean

hotexamples.com에서의 예제들: 2

Python TesseractTrainer.clean - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 lib.tesseract_training.TesseractTrainer.clean에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

clean(2)

_clustering(1)

_combine_data(1)

_compute_character_set(1)

_generate_boxfile(1)

_normalize(1)

_rename_files(1)

_train_on_boxfile(1)

add_trained_data(1)

training(1)

예제 #1

파일 보기

파일: test_tesseract_trainer.py 프로젝트: bakerma07/TesseractTrainer

class TesseractTrainerTest(unittest.TestCase):

    GENERATED_DURING_TRAINING = ['unicharset', 'pffmtable', 'Microfeat', 'inttemp', 'normproto', 'mfunicharset']

    @classmethod
    def setUpClass(self):
        """ Setup a trainer with defaults arguments """
        self.trainer = TesseractTrainer(dictionary_name='test',
                                        text='text',
                                        font_name='helveticanarrow',
                                        font_path='./font/Helvetica-Narrow.otf',
                                        font_size=df.FONT_SIZE,
                                        exp_number=df.EXP_NUMBER,
                                        font_properties=df.FONT_PROPERTIES,
                                        tessdata_path=df.TESSDATA_PATH,
                                        word_list=df.WORD_LIST,
                                        verbose=False)
        self.prefix = '%s.%s.exp%d' % (self.trainer.dictionary_name, self.trainer.font_name, self.trainer.exp_number)

    @classmethod
    def tearDownClass(self):
        """ Delete all temporary files created during tests """
        tempfiles = glob.glob(self.trainer.dictionary_name + '.' + self.trainer.font_name + '*')
        for tempfile in tempfiles:
            os.remove(tempfile)
        os.remove(self.trainer.dictionary_name + '.traineddata')

    def assertFileExists(self, f):
        try:
            open(f)
        except IOError:
            raise AssertionError('The file %s does not exist.' % (f))

    def assertFileDoesNotExist(self, f):
        try:
            open(f)
        except IOError:
            pass
        else:
            raise AssertionError('The file %s does exist.' % (f))

    def test1_generate_boxfile(self):
        """ Test if the tif and box files are correctly created after executing the self.trainer._generate_boxfile() method. """
        self.trainer._generate_boxfile()
        self.assertFileExists(self.prefix + '.tif')
        self.assertFileExists(self.prefix + '.box')

    def test2_train_on_boxfile(self):
        """ Test if the tr file is correctly created after executing the self.trainer._train_on_boxfile() method. """
        self.trainer._train_on_boxfile()
        self.assertFileExists(self.prefix + '.tr')

    def test3_compute_character_set(self):
        """ Test if the unicharset file is correctly created after executing the self.trainer._compute_character_set() method. """
        self.trainer._compute_character_set()
        self.assertFileExists('unicharset')

    def test4_clustering(self):
        """ Test if the mfunicharset, inttemp, Microfeat and pffmtable files are correctly created
            after executing the self.trainer._clustering() method.
        """
        self.trainer._clustering()
        self.assertFileExists('mfunicharset')
        self.assertFileExists('inttemp')
        self.assertFileExists('Microfeat')
        self.assertFileExists('pffmtable')

    def test5_normalize(self):
        """ Test if the normproto file is correctly created after executing the self.trainer._normalize() method. """
        self.trainer._normalize()
        self.assertFileExists('normproto')

    def test6_rename_files(self):
        """ Check if all generated files were renamed to 'self.prefix'.old_name
            after executing the self.trainer._rename_files() method.
        """
        self.trainer._rename_files()
        for filename in self.GENERATED_DURING_TRAINING:
            if filename not in "mfunicharset":  # mfunicharset does not need to be renamed
                self.assertFileExists(self.trainer.dictionary_name + '.' + filename)
                self.assertFileDoesNotExist(filename)

    def test7_combine_data(self):
        """ Test if the traineddata file is correctly created after executing the self.combine_data() method. """
        self.trainer._combine_data()
        self.assertFileExists(self.trainer.dictionary_name + '.traineddata')

    def test8_clean(self):
        """ Test if the all generated files were removed after executing the self.trainer.clean() method. """
        self.trainer.clean()
        for filename in self.GENERATED_DURING_TRAINING:
            if filename not in "mfunicharset":  # mfunicharset does not need to be renamed
                self.assertFileDoesNotExist(self.trainer.dictionary_name + '.' + filename)
            else:
                self.assertFileDoesNotExist(filename)

예제 #2

파일 보기

파일: __main__.py 프로젝트: bakerma07/TesseractTrainer

    parser.add_argument('--experience_number', '-e', type=int, action='store', default=df.EXP_NUMBER,
        help="The number of the training experience.")
    parser.add_argument('--font-properties', '-f', type=str, action='store', default=df.FONT_PROPERTIES,
        help="The path of a file containing font properties for a list of training fonts.")
    parser.add_argument('--font-size', '-s', type=int, action='store', default=df.FONT_SIZE,
        help="The font size of the training font, in px.")
    parser.add_argument('--tessdata-path', '-p', type=str, action='store', default=df.TESSDATA_PATH,
        help="The path of the tessdata/ directory on your filesystem.")
    parser.add_argument('--word_list', '-w', type=str, action='store', default=df.WORD_LIST,
        help="The path of a file containing a list of frequent words.")
    parser.add_argument('--verbose', '-v', action='store_true',
        help="Use this argument if you want to display the training output.")
    args = parser.parse_args()

    perform_security_checks(args)  # Check validity of args

    # Training process
    trainer = TesseractTrainer(dictionary_name=args.tesseract_lang,
                                text=args.training_text,
                                font_name=args.font_name,
                                font_path=args.font_path,
                                font_size=args.font_size,
                                exp_number=args.experience_number,
                                font_properties=args.font_properties,
                                tessdata_path=args.tessdata_path,
                                word_list=args.word_list,
                                verbose=args.verbose)
    trainer.training()  # generate a multipage tif from args.training_text, train on it and generate a traineddata file
    trainer.clean()  # remove all files generated in the training process (except the traineddata file)
    trainer.add_trained_data()  # copy the traineddata file to the tessdata/ directory