def update_content(self, text): self.clear_widgets() self.carousel.clear_widgets() self.analyze(text) self.update_title() for block in self.blocks: words = DbUtil().get_words_with_block(block, exclude=self.word) self.carousel.add_widget(BlockColumn(block, words)) self.add_widget(self.title_label) self.add_widget(self.carousel)
def __init__(self, string='', etymology=None, meaning=None, compute_etymology=False): self.check_init_parameters(string, etymology, meaning) self.string = _u(string) # e.g. user input string self.db_util = DbUtil() if etymology and meaning: self.blocks = [[ Block(string[i], etymology=etymology[i]) for i in range(len(string)) ]] self.meanings = [meaning] self.selected_meaning = 0 # the word is clearly defined else: self.compute_suffix() self.blocks = self.compute_blocks(compute_etymology) self.meanings = self.db_util.compute_meanings( self.string_without_suffix) # Different meanings in English self.selected_meaning = 0 # index of the selected meaning
def __init__(self, string='', ethym=None, meaning=None, compute_ethym=False): self.string = _u(string) # e.g. user input string self.language = 'Korean' self.db_util = DbUtil() if ethym and meaning: assert(len(string) == len(ethym)) # to the best of my knowledge a # Korean word and its hanja # representation (when existing) # have the same lengths self.blocks = [[Block(string[i], ethym=ethym[i]) for i in range(len(string))]] self.meanings = [meaning] self.selected_meaning = 0 # the word is clearly defined else: self.compute_suffix() self.blocks = self.compute_blocks(compute_ethym) self.meanings = self.db_util.compute_meanings(self.string_without_suffix) # Different meanings in English self.selected_meaning = 0 # index of the selected meaning
def test_get_hanja_name(): util = DbUtil() assert u'클 대 / 큰 대' == util.get_hanja_name(u'大')
def test_compute_meanings(): util = DbUtil() assert '(평안) (public) peace (안정) stability, well' in util.compute_meanings(u'안녕').values
def test_get_hanja_meaning(): util = DbUtil() assert 'big' == util.get_hanja_meaning(u'大')
def test_get_hanja(self, hanja, expected): assert expected == DbUtil().get_hanja(hanja)
def test_get_words_with_block_no_etymology(self): block = Block(u'하세요') assert [] == DbUtil().get_words_with_block(block)
def test_get_words_with_block(self, input_str, exclude, input_str_expected): word = KoreanWord(input_str, compute_etymology=True) block = word.get_blocks_for_selected_meaning()[0] words = DbUtil().get_words_with_block(block, exclude=exclude) assert len(words) > 0 assert input_str_expected == (input_str in [word[0] for word in words])
def test_compute_meanings(self): util = DbUtil() assert '(평안) (public) peace (안정) stability, well' in util.compute_meanings(u'안녕')
def test_get_hanja_meaning(self, hanja, expected): util = DbUtil() assert expected == util.get_hanja_meaning(hanja)
def test_get_hanja_name(self, hanja, expected): util = DbUtil() assert expected == util.get_hanja_name(hanja)
def test_no_db(self, monkeypatch): monkeypatch.setattr('asian_word_analyzer.korean.db.connection_string', 'dummy') with pytest.raises(FileNotFoundError): DbUtil()
class KoreanWord(object): """ This class is used to manipulate Korean words. """ def __init__(self, string='', ethym=None, meaning=None, compute_ethym=False): self.string = _u(string) # e.g. user input string self.language = 'Korean' self.db_util = DbUtil() if ethym and meaning: assert(len(string) == len(ethym)) # to the best of my knowledge a # Korean word and its hanja # representation (when existing) # have the same lengths self.blocks = [[Block(string[i], ethym=ethym[i]) for i in range(len(string))]] self.meanings = [meaning] self.selected_meaning = 0 # the word is clearly defined else: self.compute_suffix() self.blocks = self.compute_blocks(compute_ethym) self.meanings = self.db_util.compute_meanings(self.string_without_suffix) # Different meanings in English self.selected_meaning = 0 # index of the selected meaning @property def meaning(self): """ Meaning getter """ return self.meanings[self.selected_meaning] def get_blocks_for_selected_meaning(self): """ Getter for the blocks corresponding to the selected meaning """ return self.blocks[self.selected_meaning] @property def ethym(self): return ''.join([block.ethym for block in \ self.blocks[self.selected_meaning] if block.ethym]) #========================================================================== # PRINT METHODS #========================================================================== def print_blocks_for_selected_meaning(self): """ This methods prints the block strings for the selected meaning. Example: -------- For the word '안녕', the printed blocks will be ['안', '녕'] """ return [block.string for block in self.blocks[self.selected_meaning]] #========================================================================== # LANGUAGE METHODS #========================================================================== def compute_suffix(self): """ This method computes: self.suffix self.suffix_meaning self.string_without_suffix """ suffixes = {u'하다':u'하다 verb particule', \ u'합니다': u'formal 하다 ending', \ u'하세요': u'formal imperative form of 하다', \ u'요': u'politeness particle',\ u'님': u'honorific particle'} # TODO: store the suffixes in the database instead of hardcoding them here detected_suffix = '' for suffix in suffixes.keys(): if self.string.endswith(suffix): detected_suffix = suffix continue self.string_without_suffix = self.string[0:len(self.string)-len(detected_suffix)] self.suffix = detected_suffix self.suffix_meaning = suffixes.get(detected_suffix, None) def compute_blocks(self, compute_ethym=False): """ Compute the blocks given the input string. Output: Returns a list of lists of blocks, i.e. [ [b11, ..., b1n1], [b21, ..., b2n2], ...], where each list of blocks [bi1, ..., bini] corresponds to a possible meaning of the input string. Note: In this implemenation, only one meaning is available. """ if DEBUG: UI.render_info('compute_blocks(...) called for word ' + self.string) if not compute_ethym: blocks = [Block(self.string_without_suffix[i]) \ for i in range(len(self.string_without_suffix)) \ if self.string_without_suffix[i] != ' '] else: ethym = get_hanja(self.string_without_suffix) if DEBUG: UI.render_info(ethym) blocks = [Block(self.string_without_suffix[i], ethym=ethym[i], \ meaning=self.db_util.get_hanja_meaning(ethym[i]), \ name=self.db_util.get_hanja_name(ethym[i])) \ for i in range(len(self.string_without_suffix)) \ if self.string_without_suffix[i] != ' '] if self.suffix: suffix_desc = 'Suffix: ' + self.suffix_meaning blocks.append(Block(self.suffix, meaning=suffix_desc)) return [blocks]
class KoreanWord(AsianWord): """ This class is used to manipulate Korean words. """ language = 'Korean' def __init__(self, string='', etymology=None, meaning=None, compute_etymology=False): self.check_init_parameters(string, etymology, meaning) self.string = _u(string) # e.g. user input string self.db_util = DbUtil() if etymology and meaning: self.blocks = [[ Block(string[i], etymology=etymology[i]) for i in range(len(string)) ]] self.meanings = [meaning] self.selected_meaning = 0 # the word is clearly defined else: self.compute_suffix() self.blocks = self.compute_blocks(compute_etymology) self.meanings = self.db_util.compute_meanings( self.string_without_suffix) # Different meanings in English self.selected_meaning = 0 # index of the selected meaning @staticmethod def check_init_parameters(string, etymology, meaning): if etymology is not None: if len(string) != len(etymology): # to the best of my knowledge a Korean word and its hanja # representation (when existing) have the same lengths raise ValueError( 'string and etymology must have the same lengths') def compute_suffix(self): """ This method computes: self.suffix self.suffix_meaning self.string_without_suffix """ suffixes = { u'하다': u'하다 verb particle', u'합니다': u'formal 하다 ending', u'하세요': u'formal imperative form of 하다', u'요': u'politeness particle', u'님': u'honorific particle' } # TODO: store the suffixes in the database instead of hardcoding them here detected_suffix = '' for suffix in suffixes.keys(): if self.string.endswith(suffix): detected_suffix = suffix continue self.string_without_suffix = self.string[0:len(self.string) - len(detected_suffix)] self.suffix = detected_suffix self.suffix_meaning = suffixes.get(detected_suffix, None) def compute_blocks(self, compute_etymology=False): """ Compute the blocks given the input string. Output: Returns a list of lists of blocks, i.e. [ [b11, ..., b1n1], [b21, ..., b2n2], ...], where each list of blocks [bi1, ..., bini] corresponds to a possible meaning of the input string. Note: In this implementation, only one meaning is available. """ ui.render_debug('compute_blocks(...) called for word ' + self.string) if compute_etymology: etymology = self.db_util.get_hanja(self.string_without_suffix) if etymology: ui.render_debug('Found hanja={}'.format(etymology)) blocks = [ Block(self.string_without_suffix[i], etymology=etymology[i], meaning=self.db_util.get_hanja_meaning(etymology[i]), name=self.db_util.get_hanja_name(etymology[i])) for i in range(len(self.string_without_suffix)) if self.string_without_suffix[i] != ' ' ] else: ui.render_error( 'Hanja not found for {}.' 'Please check the spelling or populate the `Korean` table ' 'with more data.'.format(self.string_without_suffix)) blocks = [] else: blocks = [ Block(self.string_without_suffix[i]) for i in range(len(self.string_without_suffix)) if self.string_without_suffix[i] != ' ' ] if self.suffix: suffix_desc = 'Suffix: ' + self.suffix_meaning blocks.append(Block(self.suffix, meaning=suffix_desc)) return [blocks]