예제 #1
0
    def update_content(self, text):
        self.clear_widgets()
        self.carousel.clear_widgets()
        self.analyze(text)
        self.update_title()

        for block in self.blocks:
            words = DbUtil().get_words_with_block(block, exclude=self.word)
            self.carousel.add_widget(BlockColumn(block, words))

        self.add_widget(self.title_label)
        self.add_widget(self.carousel)
예제 #2
0
    def __init__(self,
                 string='',
                 etymology=None,
                 meaning=None,
                 compute_etymology=False):
        self.check_init_parameters(string, etymology, meaning)
        self.string = _u(string)  # e.g. user input string
        self.db_util = DbUtil()

        if etymology and meaning:
            self.blocks = [[
                Block(string[i], etymology=etymology[i])
                for i in range(len(string))
            ]]
            self.meanings = [meaning]
            self.selected_meaning = 0  # the word is clearly defined

        else:
            self.compute_suffix()
            self.blocks = self.compute_blocks(compute_etymology)
            self.meanings = self.db_util.compute_meanings(
                self.string_without_suffix)  # Different meanings in English
            self.selected_meaning = 0  # index of the selected meaning
예제 #3
0
    def __init__(self, string='', ethym=None, meaning=None, compute_ethym=False):
        self.string = _u(string)  # e.g. user input string
        self.language = 'Korean'
        self.db_util = DbUtil()

        if ethym and meaning:
            assert(len(string) == len(ethym)) # to the best of my knowledge a
                                              # Korean word and its hanja
                                              # representation (when existing)
                                              # have the same lengths
            self.blocks = [[Block(string[i], ethym=ethym[i]) for i in range(len(string))]]
            self.meanings = [meaning]
            self.selected_meaning = 0 # the word is clearly defined

        else:
            self.compute_suffix()
            self.blocks = self.compute_blocks(compute_ethym)
            self.meanings = self.db_util.compute_meanings(self.string_without_suffix) # Different meanings in English
            self.selected_meaning = 0 # index of the selected meaning
예제 #4
0
def test_get_hanja_name():
    util = DbUtil()
    assert u'클 대 / 큰 대' == util.get_hanja_name(u'大')
예제 #5
0
def test_compute_meanings():
    util = DbUtil()
    assert '(평안) (public) peace (안정) stability, well' in util.compute_meanings(u'안녕').values
예제 #6
0
def test_get_hanja_meaning():
    util = DbUtil()
    assert 'big' == util.get_hanja_meaning(u'大')
예제 #7
0
 def test_get_hanja(self, hanja, expected):
     assert expected == DbUtil().get_hanja(hanja)
예제 #8
0
 def test_get_words_with_block_no_etymology(self):
     block = Block(u'하세요')
     assert [] == DbUtil().get_words_with_block(block)
예제 #9
0
 def test_get_words_with_block(self, input_str, exclude, input_str_expected):
     word = KoreanWord(input_str, compute_etymology=True)
     block = word.get_blocks_for_selected_meaning()[0]
     words = DbUtil().get_words_with_block(block, exclude=exclude)
     assert len(words) > 0
     assert input_str_expected == (input_str in [word[0] for word in words])
예제 #10
0
 def test_compute_meanings(self):
     util = DbUtil()
     assert '(평안) (public) peace (안정) stability, well' in util.compute_meanings(u'안녕')
예제 #11
0
 def test_get_hanja_meaning(self, hanja, expected):
     util = DbUtil()
     assert expected == util.get_hanja_meaning(hanja)
예제 #12
0
 def test_get_hanja_name(self, hanja, expected):
     util = DbUtil()
     assert expected == util.get_hanja_name(hanja)
예제 #13
0
 def test_no_db(self, monkeypatch):
     monkeypatch.setattr('asian_word_analyzer.korean.db.connection_string', 'dummy')
     with pytest.raises(FileNotFoundError):
         DbUtil()
예제 #14
0
class KoreanWord(object):
    """ This class is used to manipulate Korean words. """
    def __init__(self, string='', ethym=None, meaning=None, compute_ethym=False):
        self.string = _u(string)  # e.g. user input string
        self.language = 'Korean'
        self.db_util = DbUtil()

        if ethym and meaning:
            assert(len(string) == len(ethym)) # to the best of my knowledge a
                                              # Korean word and its hanja
                                              # representation (when existing)
                                              # have the same lengths
            self.blocks = [[Block(string[i], ethym=ethym[i]) for i in range(len(string))]]
            self.meanings = [meaning]
            self.selected_meaning = 0 # the word is clearly defined

        else:
            self.compute_suffix()
            self.blocks = self.compute_blocks(compute_ethym)
            self.meanings = self.db_util.compute_meanings(self.string_without_suffix) # Different meanings in English
            self.selected_meaning = 0 # index of the selected meaning


    @property
    def meaning(self):
        """ Meaning getter """
        return self.meanings[self.selected_meaning]

    def get_blocks_for_selected_meaning(self):
        """ Getter for the blocks corresponding to the selected meaning """
        return self.blocks[self.selected_meaning]

    @property    
    def ethym(self):
        return ''.join([block.ethym for block in \
                    self.blocks[self.selected_meaning] if block.ethym])

    #==========================================================================
    #  PRINT METHODS
    #==========================================================================

    def print_blocks_for_selected_meaning(self):
        """ This methods prints the block strings for the selected meaning.

        Example:
        --------
            For the word '안녕', the printed blocks will be ['안', '녕']
        """
        return [block.string for block in self.blocks[self.selected_meaning]]


    #==========================================================================
    #   LANGUAGE METHODS
    #==========================================================================

    def compute_suffix(self):
        """ This method computes:
        self.suffix
        self.suffix_meaning
        self.string_without_suffix
        """
        suffixes = {u'하다':u'하다 verb particule', \
                    u'합니다': u'formal 하다 ending', \
                    u'하세요': u'formal imperative form of 하다', \
                    u'요': u'politeness particle',\
                    u'님': u'honorific particle'}
        # TODO: store the suffixes in the database instead of hardcoding them here

        detected_suffix = ''
        for suffix in suffixes.keys():
            if self.string.endswith(suffix):
                detected_suffix = suffix
                continue
        self.string_without_suffix = self.string[0:len(self.string)-len(detected_suffix)]
        self.suffix = detected_suffix
        self.suffix_meaning = suffixes.get(detected_suffix, None)

    def compute_blocks(self, compute_ethym=False):
        """ Compute the blocks given the input string.

        Output:
            Returns a list of lists of blocks, i.e.
            [ [b11, ..., b1n1], [b21, ..., b2n2], ...], where each list of
            blocks [bi1, ..., bini] corresponds to a possible meaning of the
            input string.

        Note:
            In this implemenation, only one meaning is available.
        """
        if DEBUG:
            UI.render_info('compute_blocks(...) called for word ' + self.string)

        if not compute_ethym:
            blocks = [Block(self.string_without_suffix[i]) \
                            for i in range(len(self.string_without_suffix)) \
                            if self.string_without_suffix[i] != ' ']
        else:
            ethym = get_hanja(self.string_without_suffix)
            if DEBUG:
                UI.render_info(ethym)

            blocks = [Block(self.string_without_suffix[i], ethym=ethym[i], \
                        meaning=self.db_util.get_hanja_meaning(ethym[i]), \
                        name=self.db_util.get_hanja_name(ethym[i])) \
                        for i in range(len(self.string_without_suffix)) \
                        if self.string_without_suffix[i] != ' ']

        if self.suffix:
            suffix_desc = 'Suffix: ' + self.suffix_meaning
            blocks.append(Block(self.suffix, meaning=suffix_desc))

        return [blocks]
예제 #15
0
class KoreanWord(AsianWord):
    """ This class is used to manipulate Korean words. """
    language = 'Korean'

    def __init__(self,
                 string='',
                 etymology=None,
                 meaning=None,
                 compute_etymology=False):
        self.check_init_parameters(string, etymology, meaning)
        self.string = _u(string)  # e.g. user input string
        self.db_util = DbUtil()

        if etymology and meaning:
            self.blocks = [[
                Block(string[i], etymology=etymology[i])
                for i in range(len(string))
            ]]
            self.meanings = [meaning]
            self.selected_meaning = 0  # the word is clearly defined

        else:
            self.compute_suffix()
            self.blocks = self.compute_blocks(compute_etymology)
            self.meanings = self.db_util.compute_meanings(
                self.string_without_suffix)  # Different meanings in English
            self.selected_meaning = 0  # index of the selected meaning

    @staticmethod
    def check_init_parameters(string, etymology, meaning):
        if etymology is not None:
            if len(string) != len(etymology):
                # to the best of my knowledge a Korean word and its hanja
                # representation (when existing) have the same lengths
                raise ValueError(
                    'string and etymology must have the same lengths')

    def compute_suffix(self):
        """ This method computes:
        self.suffix
        self.suffix_meaning
        self.string_without_suffix
        """
        suffixes = {
            u'하다': u'하다 verb particle',
            u'합니다': u'formal 하다 ending',
            u'하세요': u'formal imperative form of 하다',
            u'요': u'politeness particle',
            u'님': u'honorific particle'
        }
        # TODO: store the suffixes in the database instead of hardcoding them here

        detected_suffix = ''
        for suffix in suffixes.keys():
            if self.string.endswith(suffix):
                detected_suffix = suffix
                continue
        self.string_without_suffix = self.string[0:len(self.string) -
                                                 len(detected_suffix)]
        self.suffix = detected_suffix
        self.suffix_meaning = suffixes.get(detected_suffix, None)

    def compute_blocks(self, compute_etymology=False):
        """ Compute the blocks given the input string.

        Output:
            Returns a list of lists of blocks, i.e.
            [ [b11, ..., b1n1], [b21, ..., b2n2], ...], where each list of
            blocks [bi1, ..., bini] corresponds to a possible meaning of the
            input string.

        Note:
            In this implementation, only one meaning is available.
        """
        ui.render_debug('compute_blocks(...) called for word ' + self.string)

        if compute_etymology:
            etymology = self.db_util.get_hanja(self.string_without_suffix)
            if etymology:
                ui.render_debug('Found hanja={}'.format(etymology))

                blocks = [
                    Block(self.string_without_suffix[i],
                          etymology=etymology[i],
                          meaning=self.db_util.get_hanja_meaning(etymology[i]),
                          name=self.db_util.get_hanja_name(etymology[i]))
                    for i in range(len(self.string_without_suffix))
                    if self.string_without_suffix[i] != ' '
                ]
            else:
                ui.render_error(
                    'Hanja not found for {}.'
                    'Please check the spelling or populate the `Korean` table '
                    'with more data.'.format(self.string_without_suffix))
                blocks = []
        else:
            blocks = [
                Block(self.string_without_suffix[i])
                for i in range(len(self.string_without_suffix))
                if self.string_without_suffix[i] != ' '
            ]

        if self.suffix:
            suffix_desc = 'Suffix: ' + self.suffix_meaning
            blocks.append(Block(self.suffix, meaning=suffix_desc))

        return [blocks]