Ejemplo n.º 1
0
    def __solve_kana(self):

        for branch in self.__current_branches:
            #The next character in the reading that this branch starts at
            r_index = branch.next_reading
            
            if r_index >= len(self.reading):
                continue
            
            r_char = self.reading[r_index]
            if is_kata(self.__w_char) and is_hira(r_char):
                r_char = hira_to_kata(r_char)
            if is_hira(self.__w_char) and is_kata(r_char):
                r_char = kata_to_hira(r_char) 
        
            if self.__w_char == r_char:
                s = Segment(None, self.__w_char, self.__w_char, 0,
                            self.reading[r_index], self.__w_char)
                n_branch = Branch(branch, s)
                self.__branches_at[self.__w_index+1].append(n_branch)
                self.__usable_branches += 1
Ejemplo n.º 2
0
def get_id(char, reading):
    """Returns the database id of the reading of char. Reading can be either
    hiragana or katakana (on or kun readings can be found with either)."""

    if len(reading) == 0:
        return None

    conn.row_factory = sqlite3.Row
    # Get both the hiragana and katakana form of reading
    if tools.is_kata(reading[0]):
        k_reading = reading
        h_reading = tools.kata_to_hira(reading)
    else:
        h_reading = reading
        k_reading = tools.hira_to_kata(reading)

    s = "SELECT * FROM reading WHERE character=? and (reading=? or reading=?)"
    result = conn.execute(s, [char, h_reading, k_reading]).fetchall()
    if len(result) <= 0:
        return None
    else:
        return result[0]["id"]
Ejemplo n.º 3
0
    def __solve_character(self):
    
        #replace 々 with its respective kanji
        if self.__w_char == u'々' and self.__w_index > 0:
            q_char = self.word[self.__w_index-1]
        else:
            q_char = self.__w_char
    
        s = "select id, reading from reading where character=?"
        char_readings = c.execute(s, q_char).fetchall()
                
        for cr in char_readings:
            dic_r = cr['reading']
                              
            word = self.word[self.__w_index:]
            word_len = len(word) #so we don't check ahead of it
    
            variants = get_variants(dic_r)
            
#            print "-----" + cr['reading'] + "-----"
#            for (r, tag, rl) in variants:
#                print r, tag

            for b in self.__current_branches:
                r_index = b.next_reading
                if r_index >= len(self.reading):
                    continue
                
                reading = self.reading[r_index:]
                
                for var in variants:
                    r = var.reading
                    tags = var.tags
                    rl = var.length
                    
                    known_r = reading[:rl]
                    kr_is_kata = is_kata(known_r[0])
                    
                    #If they're not both katakana, convert the non-katakana
                    #to hiragana so we can compare them.
                    if kr_is_kata and not is_kata(r[0]):
                        known_r = kata_to_hira(known_r)
                    elif not kr_is_kata and is_kata(r[0]):
                        r = kata_to_hira(r)
                                       
                    #we want a complete match of reading to kanji reading variant
                    match_length = 0
                    
                    #trailing kana (after kanji) in the word that is part of the reading
                    w_trail = 0 
                    for (i, j) in zip(known_r, r):
                        if i == j:
                            match_length += 1
                            #also check ahead for kana in the word that could
                            #belong to this reading
                            if w_trail+1 < word_len and word[w_trail + 1] == i:
                                w_trail += 1
                    
                    if match_length == len(r):
                        seg = Segment(tags, self.__w_char, dic_r, cr['id'],
                                      reading[:rl], word[:1])
                        n_branch = Branch(b, seg)
                        self.__branches_at[self.__w_index+1].append(n_branch)
                        self.__usable_branches += 1
                        
                        if w_trail > 0:
                            seg = Segment(tags, self.__w_char,
                                          dic_r, cr['id'], reading[:rl],
                                          word[:1+w_trail])
                            n_branch = Branch(b, seg)
                            self.__branches_at[self.__w_index+1+w_trail].append(n_branch)
                            self.__usable_branches += 1