def spot_keywords_in_pages(self, pages, keywords): file_filters = [] for page in pages: file_filters.append(str(page) + '-*.png') word_images = sorted( fnmatch.filter(os.listdir(self.paths["resized_word_images"]), file_filters)) validate_word_features = self.get_word_features(word_images) return self.spot_keywords(validate_word_features, keywords)
def train(self, train_pages, save_file_name=''): file_filters = [] for page in train_pages: file_filters.append(str(page) + '-*.png') word_images = sorted( fnmatch.filter(os.listdir(self.paths["resized_word_images"]), file_filters)) self.words_features = self.get_word_features(word_images) if len(save_file_name) != 0: self.save_word_features(self.words_features, save_file_name) for wf in self.words_features: self.learned_words.add(wf[0]) return self.words_features
def test_limit_filter(self): """Test expansion limit of `filter`.""" with self.assertRaises(_wcparse.PatternLimitException): fnmatch.filter(['name'], '{1..11}', flags=fnmatch.BRACE, limit=10)
def test_filter(self): """Test exclusion with filter.""" self.assertEqual(fnmatch.filter(['name', 'test'], '*', exclude='test'), ['name'])