def word_list(aff: str, dic: str, base_words_only: bool = False, print_out: bool = True) -> set:
    file = sys.stdout if print_out else IOBase()
    print('Start parse affix file ...', file=file)
    affix = Affix(aff)
    print('Finished parsing affix file', file=file)
    print('Start parse dictionary file ...', file=file)
    dictionary = parse_dictionary(dic, affix.encoding, affix.flag, affix.iconv, affix.oconv)
    print('Finished parsing dictionary file', file=file)
    if base_words_only:
        out = map(lambda d: d.get_word(), dictionary)
    else:
        print('Start generating word list ...', file=file)
        out = deque()
        queue = dictionary
        while len(queue) > 0:
            print('\rnot processed words: {:<10d}'.format(len(queue)), end='', file=file)
            word = queue.popleft()
            if isinstance(word, Word):
                out.append(word.get_word())
                words = _generate_affix_words(word, affix)
                for w in words:
                    queue.append(w)
            else:
                raise ValueError('Invalid Word: {} is type of {}.'.format(word, type(word)))
        print('\rnot processed words: {:<10d}'.format(len(queue)), file=file)
        print('Finished generating word list', file=file)

    word_set = set(out)
    print('generate Words: {:d}'.format(len(word_set)), file=file)
    print(file=file)
    return word_set
Exemple #2
0
 def __init__(self):
     self._file = IOBase()
     self.n_atom = -1
     self.n_frame = -1
 def test_can_load(self, tmpdir):
     with file_structure({"file.yml": {"a": 1}}, tmpdir):
         assert CompositeLoader(FancyDict).can_load("file.yml")
     assert CompositeLoader(FancyDict).can_load("http://www.file.com")
     assert CompositeLoader(FancyDict).can_load({})
     assert CompositeLoader(FancyDict).can_load(IOBase())