Esempio n. 1
0
 def __init__(self, wiki_type, debug_flag=False):
     assert wiki_type in ['wiki', 'simplewiki', 'wiktionary', 'wikibooks', 'wikiversity']
     self.wiki_type = wiki_type
     if self.wiki_type == 'wiki':
         min_chars_per_line, min_words_per_section = 50, 50
     elif self.wiki_type == 'simplewiki':
         min_chars_per_line, min_words_per_section = 1, 1
     elif self.wiki_type == 'wiktionary':
         min_chars_per_line, min_words_per_section = 1, 3
     elif self.wiki_type == 'wikibooks':
         min_chars_per_line, min_words_per_section = 1, 10
     elif self.wiki_type == 'wikiversity':
         min_chars_per_line, min_words_per_section = 1, 3
     CorpusReader.__init__(self, min_chars_per_line=min_chars_per_line, min_words_per_section=min_words_per_section, debug_flag=debug_flag)
     if self.wiki_type == 'wiktionary':
         self.set_sections_to_use(['Noun'])