def __init__(self, wiki_type, debug_flag=False): assert wiki_type in ['wiki', 'simplewiki', 'wiktionary', 'wikibooks', 'wikiversity'] self.wiki_type = wiki_type if self.wiki_type == 'wiki': min_chars_per_line, min_words_per_section = 50, 50 elif self.wiki_type == 'simplewiki': min_chars_per_line, min_words_per_section = 1, 1 elif self.wiki_type == 'wiktionary': min_chars_per_line, min_words_per_section = 1, 3 elif self.wiki_type == 'wikibooks': min_chars_per_line, min_words_per_section = 1, 10 elif self.wiki_type == 'wikiversity': min_chars_per_line, min_words_per_section = 1, 3 CorpusReader.__init__(self, min_chars_per_line=min_chars_per_line, min_words_per_section=min_words_per_section, debug_flag=debug_flag) if self.wiki_type == 'wiktionary': self.set_sections_to_use(['Noun'])