Python to_hiragana 예제들, romkan.to_hiragana Python 예제들

예제 #1

0

파일 보기

파일: ja_helper.py 프로젝트: PythonNut/japanese_translation_assistant

    def maybe_potential_form(self) -> Optional[str]:
        pos = self.pos_str()
        surface = self.surface()

        maybe_dform = None

        if (pos[0] == "v" and len(self.morphemes) == 1
                and self.morphemes[0].dictionary_form() == surface
                and romkan.to_roma(surface).endswith("eru")
                and not jmdict_lookup(surface).entries):
            suf = romkan.to_hiragana(romkan.to_roma(surface[-2:])[:-3] + "u")
            maybe_dform = surface[:-2] + suf

        elif (pos[0] == "v"
              and romkan.to_roma(self.morphemes[0].surface()).endswith("e")
              and not jmdict_lookup(surface).entries):
            suf = romkan.to_hiragana(
                romkan.to_roma(self.morphemes[0].surface()[-1])[:-1] + "u")
            maybe_dform = self.morphemes[0].surface()[:-1] + suf

        if not maybe_dform:
            return

        maybe_pos: SudachiPos = parse(maybe_dform)[0].part_of_speech()

        if (surface not in merge_multi_dicts([
                flip_multi_dict(m)
                for m in all_conjugations(maybe_dform, maybe_pos).values()
        ]).keys()):
            return

        if not jmdict_lookup(maybe_dform).entries:
            return

        return maybe_dform

예제 #2

0

파일 보기

파일: Romautil.py 프로젝트: loxygenK/musical_typer

def get_not_halfway_hr(full_hiragana, progress_roma):
    """
    入力中に対しても正しいひらがな表記を取得する。

    :param full_hiragana: 「全体の」ひらがな
    :param progress_roma: ローマ字
    """

    # 空文字なら空文字を返す
    if len(progress_roma) == 0:
        return ""

    # 全体のひらがなに対し、どこまで打っているのかを見る
    romaji = hira2roma(full_hiragana)
    index = romaji.rfind(progress_roma)

    # 今母音を打とうとしていて、かつ直前に子音を打っている (taやhaなどに引っかかる)
    if re.match("[aeiouyn]", romaji[index]) or romkan.is_consonant(
            romaji[index - 1]):

        # 3文字で構成されるローマ表記の文字を打っているか (tyaやhyuなどに引っかかる)
        if index >= 2 and romkan.is_consonant(romaji[index - 2]):
            return romkan.to_hiragana(romaji[index - 2:])[1:]
        else:
            return romkan.to_hiragana(romaji[index - 1:])

    return romkan.to_hiragana(romaji[index:])

예제 #3

0

파일 보기

파일: sensei.py 프로젝트: djentleman/Sensei

def test_cli(lbound, ubound, count):
    score = 0
    for i in range(count):
        word = generateWord(lbound, ubound)
        print romkan.to_hiragana(word)
        userInput = str(raw_input("in romanji: "))
        tts(romkan.to_hiragana(word))
        if userInput == word:
            print "Correct"
            score += 1
        else:
            print "Incorrect"
            print "The correct answer was: " + word
    print str(score) + "/" + str(count)

예제 #4

0

파일 보기

파일: jisho.py 프로젝트: SamuraiSigma/kanjisho

def search(jisho, all_mode, rounds, verbose):
    """Dictionary mode, where kanji previously read can be searched"""
    if all_mode:
        show_all(jisho)
    elif rounds > 0:
        game(jisho, rounds)
    else:
        for x in sys.argv[1:]:
            if x in jisho:
                jisho[x]()
            else:
                hiragana = romkan.to_hiragana(x)
                katakana = romkan.to_katakana(x)
                for kanji in jisho:
                    if x in jisho[kanji].meaning:
                        if verbose is False:
                            print(kanji, "=", x)
                        else:
                            jisho[kanji]()
                    elif hiragana in jisho[kanji].kunyomi:
                        if verbose is False:
                            print(kanji, "->", hiragana)
                        else:
                            jisho[kanji]()
                    elif katakana in jisho[kanji].onyomi:
                        if verbose is False:
                            print(kanji, "~>", katakana)
                        else:
                            jisho[kanji]()

예제 #5

0

파일 보기

파일: tokenizer.py 프로젝트: sasano8/ja_text_cleaner

 def process(cls, v: str):
     v = "　".join(cls.reading_form(v))
     # v = jaconv.kata2hira(v, ignore="")  # 半角カタカナに反応しないため、事前に半角は処理しておく
     v = romkan.to_hepburn(v)
     v = romkan.to_hiragana(v)  # ローマ字からしか反応しない
     # v = re.sub(" +", "　", v)
     return v

예제 #6

0

파일 보기

파일: Utils.py 프로젝트: MaxObliviate/Max

def map_dict_form_to_different_ending(verb, romaji_ending, *special_endings):
    '''Generates Godan verb stem and computes the correct particle to attach based on the
    verb's last kana 

    Args:
        verb (str): Japanese verb in kana, might contain kanji
        romaji_ending (str): target sound of the particle to append to the verb
        *special_endings: Variable length argument list. Based on the target Godan particle 
        class (-a, -e, -i, -o). Order of particles is -u / -tsu / -su.

    Returns:
        str: verb stem with the correct particle attached depending on the last kana particle
    of the Godan verb
    '''
    last_kana = splice_verb(verb, VerbClass.GODAN, False)
    verb_stem = splice_verb(verb, VerbClass.GODAN)

    if last_kana == U_PARTICLE:
        return "{}{}".format(verb_stem, special_endings[0])
    elif last_kana == TSU_PARTICLE:
        return "{}{}".format(verb_stem, special_endings[1])
    elif last_kana == SU_PARTICLE:
        return "{}{}".format(verb_stem, special_endings[2])
    else:
        transformed_last_kana_as_romaji = "{}{}".format(
            romkan.to_roma(last_kana)[:-1], romaji_ending)
        return "{}{}".format(
            verb_stem, romkan.to_hiragana(transformed_last_kana_as_romaji))

예제 #7

0

파일 보기

파일: commands.py 프로젝트: josephsurin/shinmeikai

async def search(client, message, smk_dict):
    search_q = message.content.split()[1]
    if not is_kana(search_q[0]):
        search_q = to_hiragana(search_q)
    start = time.time()
    matches = dict_search(search_q, smk_dict)
    end = time.time()
    dur = end - start
    print(f'found {len(matches)} words in {dur} seconds')

    if len(matches) == 0:
        embed = discord.Embed(
            description=f'No results found for **{search_q}**!',
            color=0x62f7f7)
        await message.channel.send(embed=embed)
        return None

    pages = [[matches[0]]]
    current_page = 0
    for i in range(1, len(matches)):
        if (reduce(lambda acc, v: acc + len(v[2]), pages[-1], 0) +
                len(matches[i][2]) < 200):
            pages[-1].append(matches[i])
        else:
            pages.append([matches[i]])

    embed = create_page(pages[current_page], search_q, current_page + 1,
                        current_page + 1 + len(pages))

    msg = await message.channel.send(embed=embed)

    await msg.add_reaction('⬅')
    await msg.add_reaction('➡')

    return (msg.id, SearchObj(pages, search_q, msg))

예제 #8

0

파일 보기

def subvert(expr, t):
    import romkan
    expr = re.sub(u'\u014D|\u00F4', 'ou', expr, re.UNICODE)
    expr = re.sub(u'\u016B', 'uu', expr, re.UNICODE)
    expr = re.sub(u'\u0113', 'ee', expr, re.UNICODE)
    expr = re.sub(u'\u0101', 'aa', expr, re.UNICODE)
    if t == KATA: kana = romkan.to_katakana(expr)
    else: kana = romkan.to_hiragana(expr)
    return kana

예제 #9

0

파일 보기

    def clean(self):
        if not self.hiragana:
            self.hiragana = romkan.to_hiragana(self.romaji)
        elif not self.romaji:
            self.romaji = romkan.to_roma(self.hiragana).capitalize()
        elif not self.hiragana and not self.romaji:
            raise ValidationError(
                "You have to enter either the Hiragana or Romaji of a Word")

        self.slug = slugify(self.romaji)

예제 #10

0

파일 보기

def converter():
    frase = ent_romaji.get()

    ent_hiraga.delete(0, tk.END)
    ent_hiraga.insert(0, romkan.to_hiragana(frase))

    ent_kataka.delete(0, tk.END)
    ent_kataka.insert(0, romkan.to_katakana(frase))

    pass

예제 #11

0

파일 보기

파일: test.py 프로젝트: nasenag/joyodb

def lemmatize_with_mecab(expression, kanji):
    '''Find the first word containing kanji; return (lemma, reading).'''
    nodes = mecab_tagger.parseToNode(expression)
    while nodes:
        features = nodes.feature.split(',')
        if kanji in features[10]:
            lemma = features[10]
            reading = romkan.to_hiragana(romkan.to_roma(features[6]))
            return ((lemma, reading))
        nodes = nodes.next
    raise (ValueError("Mecab failed: %s, %s" % (expression, kanji)))

예제 #12

0

파일 보기

파일: test.py 프로젝트: leoboiko/joyodb

def lemmatize_with_mecab(expression, kanji):
    '''Find the first word containing kanji; return (lemma, reading).'''
    nodes = mecab_tagger.parseToNode(expression)
    while nodes:
        features = nodes.feature.split(',')
        if kanji in features[10]:
            lemma = features[10]
            reading = romkan.to_hiragana(romkan.to_roma(features[6]))
            return((lemma, reading))
        nodes = nodes.next
    raise(ValueError("Mecab failed: %s, %s" % (expression, kanji)))

예제 #13

0

파일 보기

파일: views.py 프로젝트: kimuraz/kanji-benkyou

def romaji_to_kana(request):
    """
    Converts romaji in either katakana or hiragana.
    """
    word = request.query_params.get('word', '')[0:1000]
    return Response(
        {
            'hiragana': romkan.to_hiragana(word),
            'katakana': romkan.to_katakana(word)
        },
        status=status.HTTP_200_OK)

예제 #14

0

파일 보기

파일: ui.py 프로젝트: djentleman/Sensei

    def submit(self):

        
        uromanji = self.input.get()
        if (uromanji == self.word):
            self.score += 1
            self.response.set("Correct!  ["+romkan.to_hiragana(self.word)+"] "+self.word)
            self.lblres.config(background="green")
        else:
            self.response.set("Incorrect!  ["+romkan.to_hiragana(self.word)+"] "+self.word)
            self.lblres.config(background="red")
        sensei.tts(romkan.to_hiragana(self.word))
        self.input.set("")
        self.word = sensei.generateWord(self.lbound, self.ubound)
        self.kana.set(romkan.to_hiragana(self.word))
        self.curr += 1
        self.round.set(str(self.score)+"/"+str(self.count)+" ("+str(self.curr)+")")

        if self.curr >= self.count:
            print str(self.score)+"/"+str(self.count)+" ("+str(self.curr)+")"
            self.parent.destroy()

예제 #15

0

파일 보기

파일: commands.py 프로젝트: josephsurin/shinmeikai

async def ojad_phrase(client, message):
    search_q = ''.join(message.content.split()[1:])
    if not is_kana(search_q[0]):
        search_q = to_hiragana(search_q)
    try:
        options = webdriver.ChromeOptions()
        options.add_argument('--ignore-certificate-errors')
        options.add_argument("headless")
        options.add_argument("window-size=1600x6000")
        options.binary_location = os.getenv('CHROME_BIN')
        driver = webdriver.Chrome(
            options=options, executable_path=os.getenv('CHROMEDRIVER_PATH'))

        driver.get('https://www.gavo.t.u-tokyo.ac.jp/ojad/phrasing/index')
        search_element = driver.find_element_by_id('PhrasingText')
        search_element.send_keys(search_q)
        search_element.submit()

        WebDriverWait(driver, 8).until(
            EC.visibility_of_element_located((By.ID, 'phrasing_main')))

        element = driver.find_element_by_id("phrasing_main")

        location = element.location
        size = element.size

        img_filename = 'tmp/' + str(getrandbits(32)) + '.png'

        driver.save_screenshot(img_filename)

        driver.close()

        x = location['x']
        y = location['y']
        width = location['x'] + size['width']
        height = location['y'] + size['height']
        im = Image.open(img_filename)
        im = im.crop((int(x), int(y) + 55, int(width), int(height) - 50))
        im.save(img_filename)

        print('saved image to', img_filename)

        await message.channel.send(file=discord.File(img_filename))

        os.remove(img_filename)
    except Exception as e:
        print(e)
        embed = discord.Embed(
            description=
            f'An error occured while trying to query for the OJAD phrasing for **{search_q}**!',
            color=0x62f7f7)
        await message.channel.send(embed=embed)

예제 #16

0

파일 보기

파일: CodePointProcessor.py 프로젝트: polluks/Perception-IME

def parseJapanese(tokens,lines,glyph):			# Build the Japanese ReadingsTree<->Kanji Mappings
	vector = tokens[2]
	readings = vector.split(" ")
	for reading in readings[:]:
		try:
			kana=kana=romkan.to_hiragana(reading)
		except:
			kana=reading
		try:
			print glyph.encode("ascii","backslashreplace")+" "+kana.encode("ascii","backslashreplace")
		except:
			print glyph.encode("ascii","backslashreplace")+" BROKEN="+reading
	return

예제 #17

0

파일 보기

파일: main.py 프로젝트: WeiZhou2372/Wox.Plugin.Rom2Kana

 def query(self, key):
     title = romkan.to_hiragana(key)
     results = []
     results.append({
         "Title": title,
         "SubTitle": "Copy to Clipboard",
         "IcoPath": "Images\\icon.png",
         "JsonRPCAction": {
             "method": "copy",
             "parameters": [key],
             "dontHideAfterAction": False
         }
     })
     return results

예제 #18

0

파일 보기

파일: fileWriter.py 프로젝트: vanstorm9/AI-vocaloid-kit

def fileWriter(rootPath):


        tf_str = rootPath + 'romaji.txt'
        result_str = rootPath + 'result.txt'
        t_hira_str = rootPath + 'hiragana.txt'
        sf_str = rootPath + 'jap.txt'


        text_file = open(tf_str, 'w')
        result_file = open(result_str, 'w')
        text_fileHira = open(t_hira_str, 'w')
        sourceFile = open(sf_str)


        with sourceFile as f:
                for line in f:
                        if line.isspace():
                                continue


                        line = line.replace('（','')
                        line = line.replace('）','')
                        line = line.replace(' (','')
                        line = line.replace(') ','')
                        line = line.replace('(','')
                        line = line.replace(')','')

                        result_file.write(line)

                        if only_roman_chars(line):
                                text_file.write(line)
                                text_fileHira.write(line)
                                continue


                        line = kanji_to_romaji(line) + '.\n'
                        lineHira = romkan.to_hiragana(line)

                        text_file.write(line)


                        text_fileHira.write(lineHira)

                        if 'str' in line:
                                break

        text_file.close()
        text_fileHira.close()
        result_file.close()

예제 #19

0

파일 보기

파일: main.py 프로젝트: EmreTekinalp/Qt

 def default_to_hiragana(self, row, col):
     items = self.tableWidget.selectedItems()
     if items:
         for item in items:
             if item.column():
                 if not self.lang_mode:
                     item.setText(romkan.to_hiragana(item.text()))
     item = self.tableWidget.item(row, col)
     if item:
         self.page_data['%s,%s' % (row, col)] = item.text()
         if col:
             self.page_data['%s,%s' % (row, col)] = romkan.to_roma(item.text())
     self.data['page_%s' % self.window().pageLab.text()] = self.page_data
     self.update_config(self.data)

예제 #20

0

파일 보기

파일: main.py 프로젝트: kraftydinosaur/Wox.Plugin.Rom2Kana

 def query(self, key):
     title = romkan.to_hiragana(key)
     results = []
     results.append({
         "Title": title,
         "SubTitle": "Copy to Clipboard",
         "IcoPath": "Images\\icon.png",
         "JsonRPCAction": {
             "method": "copy",
             "parameters": [key],
             "dontHideAfterAction": False
         }
     })
     return results

예제 #21

0

파일 보기

    def replace_roman_to_kana(cls, msg: str) -> str:
        '''
        ローマ字をかな読み文字に置換する
        例）ninja→にんじゃ
        \n※英単語もローマ字に変換されます
        '''

        _msg = msg
        # ローマ字かなの置換
        for word in cls.re_roma.findall(_msg):
            read = romkan.to_hiragana(word)
            _msg = _msg.replace(word, read, 1)

        return _msg

예제 #22

0

파일 보기

 def kana(self, msg, args):
     """Converts Romazi to kana"""
     result = "Am I supposed to guess the word you want?..."
     word = None
     if len(args) == 1:
         word = args[0]
     elif len(args) > 1:
         word = " ".join(args)
     if word is not None:
         if word.isupper():
             result = romkan.to_katakana(word)
         elif word.islower():
             result = romkan.to_hiragana(word)
     return result

예제 #23

0

파일 보기

 async def romkan(self, ctx, *, text: commands.clean_content):
     """Convert romaji into hiragana or katakana, or vice-versa."""
     if text[:3] in ["hg ", "kk ", "ro "]:
         tp, text = text[:2], text[3:]
     else:
         tp = ctx.invoked_with
         if tp == "romkan":
             return await ctx.send(
                 "Please either use `!hg`, `!kk` or `!ro` (for hiragana, katakana and romaji respectively), or pass the type as an argument: `!romkan hg LyricLy wa baka desu yo`"
             )
     if tp == "hg":
         await ctx.send(romkan.to_hiragana(text))
     elif tp == "kk":
         await ctx.send(romkan.to_katakana(text))
     elif tp == "ro":
         await ctx.send(romkan.to_hepburn(text))

예제 #24

0

파일 보기

파일: spellchecker.py 프로젝트: HSunnyKim/NLP

def hiragana_candidates(word,num):
    if not isinstance(word,unicode): #unicode check
        word = word.decode("utf8")

    romaji = unicodedata.normalize("NFKC",romkan.to_roma(word))
    print "romaji:{}".format(romaji)

    candidates = prob(romaji) + edit1_prob(romaji) + edit2_prob(romaji)
    if candidates:
        for i,word_prob_tuple in enumerate(sorted(candidates,key=lambda x :x[1],reverse=True)[:num]):
            romaji = word_prob_tuple[0]
            p = word_prob_tuple[1]
            kana = romkan.to_hiragana(romaji).encode("utf8")
            print " {} : {:<10}{:<20} {:<}".format(i+1,kana,"("+romaji+")",p)
    else:
        print "NO RESULT"

예제 #25

0

파일 보기

파일: jamdictapi.py 프로젝트: didmar/jamdict-api

def to_hiragana(word: str):
    _word = word.lower()
    hiragana = romkan.to_hiragana(_word)
    valid = not re.search("[a-z']", hiragana)

    if hiragana.endswith("ん") and not (_word.endswith("nn")
                                       or _word.endswith("'")):
        partial = hiragana[:-1] + "n"
    else:
        partial = hiragana

    return {
        'hiragana': hiragana,
        'partial': partial,
        'valid': valid,
    }

예제 #26

0

파일 보기

파일: downloader_api.py 프로젝트: klieret/anki-readings-audio

def _get_audio_entries(reading):
    """ Try to download audio files with given reading.
    :param reading: the reading
    :return: a list of download entries (type name: DownloadEntry)
    """
    retrieved_entries = []
    hiragana = romkan.to_hiragana(reading)
    field_data = JapaneseFieldData("", "", hiragana)
    for dloader in downloaders:
        dloader.language = "ja"
        try:
            dloader.download_files(field_data)
        except:
            continue
        retrieved_entries += dloader.downloads_list
    return retrieved_entries

예제 #27

0

파일 보기

    def process_validated_user_input(self):
        higana_input = romkan.to_hiragana(self.validated_user_input)
        # Check there is only hiragana
        if re.match("[a-z]", higana_input):
            self.set_warning_msg("Invalid input !")
            return

        valid_entries_by_kanji_form, errors = self.lookup_word_entries(higana_input)

        if not valid_entries_by_kanji_form:
            if errors:
                # error message have a digit at the beginning, to get the most precise error
                error = sorted(errors)[0][1:]
                self.set_warning_msg(error)
            else:
                self.set_warning_msg("No match ! press Enter again to give up")
            self.lose_hp()
            self.free_joker = True
            return

        print(f"Found {len(valid_entries_by_kanji_form.keys())} valid entries for {higana_input}")
        for word in list(valid_entries_by_kanji_form.keys()):
            if word in self.words:
                print(f"Excluding word {word}: already used before")
                del valid_entries_by_kanji_form[word]

        if not valid_entries_by_kanji_form:
            self.set_warning_msg("Already used, try something else")
            return

        candidates = sorted(valid_entries_by_kanji_form.keys(), key=word_to_freqrank)

        if len(candidates) > 1:
            new_word = self.choose_word(candidates)
            if new_word is None:
                return
        else:
            new_word = candidates[0]
            # Special render to make the user wait !
            self.render_validated_word(new_word)
            pygame.display.flip()

        # Lose the free joker if any
        self.free_joker = False

        self.add_word(new_word)
        pygame.event.clear()  # FIXME: does not prevent "double taps"

예제 #28

0

파일 보기

def show_data(char):
	import romkan
	global fs
	html = ""
	for f in fs.find({'filename': '{}.jpg'.format(char)}):
		html += """
		<a class="delete-img" data-target="{}" href="#">
			<img src='/image/{}' width=80 />
		</a>
		""".format(f.md5, f.md5)
	
	if html == "":
		html = "<h2>no data...</h2>"

	return render_template('test.html', body=unicode("""
	<h1>Train Data for "{}"</h1>{}
	""".format(romkan.to_hiragana(char).encode('utf-8'), html), encoding='utf-8'))

예제 #29

0

파일 보기

파일: commands.py 프로젝트: josephsurin/shinmeikai

async def ojad_index(client, message):
    search_q = message.content.split()[1]
    if not is_kana(search_q[0]):
        search_q = to_hiragana(search_q)
    try:
        options = webdriver.ChromeOptions()
        options.add_argument('--ignore-certificate-errors')
        options.add_argument("headless")
        options.add_argument('--lang=ja')
        options.add_argument("window-size=1600x1000")
        options.binary_location = os.getenv('CHROME_BIN')
        driver = webdriver.Chrome(
            options=options, executable_path=os.getenv('CHROMEDRIVER_PATH'))

        driver.get(
            'http://www.gavo.t.u-tokyo.ac.jp/ojad/search/index/display:print/sortprefix:accent/narabi1:kata_asc/narabi2:accent_asc/narabi3:mola_asc/yure:visible/curve:invisible/details:invisible/limit:20/word:'
            + search_q)

        element = driver.find_element_by_xpath("//table[@id='word_table']")

        location = element.location
        size = element.size

        img_filename = 'tmp/' + str(getrandbits(32)) + '.png'

        driver.save_screenshot(img_filename)

        driver.close()

        x = location['x']
        y = location['y']
        width = location['x'] + size['width']
        height = location['y'] + size['height']
        im = Image.open(img_filename)
        im = im.crop((int(x), int(y), int(width), int(height)))
        im.save(img_filename)
        print('saved image to', img_filename)

        await message.channel.send(file=discord.File(img_filename))

        os.remove(img_filename)
    except:
        embed = discord.Embed(
            description=f'No results found for **{search_q}**!',
            color=0x62f7f7)
        await message.channel.send(embed=embed)

예제 #30

0

파일 보기

파일: commands.py 프로젝트: derram/berry

 def command_ja(self, event):
     '''Usage: ~ja <k/h/r> <arg> displays katakana/hiragana/romaji for a given argument, converting between romaji and kana'''
     try:
         dest, phrase = event.params.split(' ', 1)
         dest = dest.lower()
         if dest == 'k':
             resp = romkan.to_katakana(phrase)
         elif dest == 'h':
             resp = romkan.to_hiragana(phrase)
         elif dest == 'r':
             resp = romkan.to_roma(phrase.decode('utf-8'))
         else:
             raise
         self.send_message(event.respond, resp)
     except:
         self.send_message(event.respond, 'Invalid input, please check syntax.')
         raise

예제 #31

0

파일 보기

파일: commands.py 프로젝트: flare561/berry

 def command_ja(self, event):
     '''Usage: ~ja <k/h/r> <arg> displays katakana/hiragana/romaji for a given argument, converting between romaji and kana'''
     try:
         dest, phrase = event.params.split(' ', 1)
         dest = dest.lower()
         if dest == 'k':
             resp = romkan.to_katakana(phrase)
         elif dest == 'h':
             resp = romkan.to_hiragana(phrase)
         elif dest == 'r':
             resp = romkan.to_roma(phrase.decode('utf-8'))
         else:
             raise
         self.send_message(event.respond, resp)
     except:
         self.send_message(event.respond, 'Invalid input, please check syntax.')
         raise

예제 #32

0

파일 보기

파일: spellchecker.py 프로젝트: Rigeru/NLP

def hiragana_candidates(word, num):
    if not isinstance(word, unicode):  #unicode check
        word = word.decode("utf8")

    romaji = unicodedata.normalize("NFKC", romkan.to_roma(word))
    print "romaji:{}".format(romaji)

    candidates = prob(romaji) + edit1_prob(romaji) + edit2_prob(romaji)
    if candidates:
        for i, word_prob_tuple in enumerate(
                sorted(candidates, key=lambda x: x[1], reverse=True)[:num]):
            romaji = word_prob_tuple[0]
            p = word_prob_tuple[1]
            kana = romkan.to_hiragana(romaji).encode("utf8")
            print " {} : {:<10}{:<20} {:<}".format(i + 1, kana,
                                                   "(" + romaji + ")", p)
    else:
        print "NO RESULT"

예제 #33

0

파일 보기

파일: main.py 프로젝트: itayperl/kantan

def lookup(rad_dawg, dict_dawg, pattern):
    components = []

    # sanity
    if len(pattern) > 40 and pattern.count('[') > 20:
        return []

    # romaji => kana
    pattern = re.sub('[-A-Z]+', lambda m: romkan.to_katakana(m.group(0)), pattern)
    pattern = re.sub('[-a-z]+', lambda m: romkan.to_hiragana(m.group(0)), pattern)

    for c in PATTERN_RE.findall(pattern):
        if c[0] == '[' and c[-1] == ']':
            s = rad_dawg.lookup_kanji(u''.join(c[1:-1]))
            components.append(s)
        else:
            components.append(set([c]))

    return dict_dawg.lookup_word(components)[:MAX_RESULTS]

예제 #34

0

파일 보기

파일: signs.py 프로젝트: henne90gen/vocab_trainer

def ask_sign(syllable):
    with Timer(syllable) as t:
        try:
            command = input(syllable)

            if command == 'q':
                return True
            elif command == 'c':
                t.correct()
            elif command == 'i':
                t.incorrect()

            print(romkan.to_hiragana(syllable))
            print()

        except KeyboardInterrupt:
            return True
        except CaughtSignal:
            return True

예제 #35

0

파일 보기

def ocr(filename):
	import romkan
	fpath = os.path.join(
			os.path.abspath(os.path.dirname(__file__)),
			'file',
			filename)
	datalist = chars(fpath)
	ocr_str = ""
	for (line_n, line_datas) in enumerate(datalist):
		result = []
		likelihoods = []
		for datas in line_datas:
			datas, small = datas
			s = ""	# 認識結果文字列
			l = 1	# 認識結果のscore(=尤度の総和**(1/文字列長))
			for (i, data) in enumerate(datas):
				testX = encode(raw=data)
				rom = label[int(clf.predict(testX)[0])]
				proba = clf.predict_proba(testX)[0]
				l *= max(proba) # 尤度をかける
				# 小文字判定
				if small[i] and rom in small_label:
					rom = 'x' + rom

				s += romkan.to_hiragana(rom)
			# パターンごとの文字列長の違いを吸収する
			l = l**(1./len(datas))
			print("%s (%f)" % (s, l))
			result.append(s)
			likelihoods.append(l)
	
		# 認識結果のscoreが最大のものを採用する
		like_i = np.argmax(likelihoods)
		ocr_str += (result[like_i] + '\n')

		# 選択した抽出結果を画像として出力する
		for img in datalist[line_n][like_i][0]:
			img.save('file/{}_{}.jpg'.format(
				line_n, datalist[line_n][like_i][0].index(img)))

	ocr_str = ocr_str.rstrip()
	print u"{}".format(ocr_str)
	return ocr_str

예제 #36

0

파일 보기

    def render_prompt(self):
        self.prompt = self.large_font.render('>', True, BLUE)
        self.prompt_rect = self.prompt.get_rect(bottomleft=(0, self.screen_h))
        self.screen.blit(self.prompt, self.prompt_rect)

        if self.user_input_value:
            text = romkan.to_hiragana(self.user_input_value)
            color = GREEN
        else:  #elif not self.words:
            # First kanji ? show a message to help new players
            text = f"Type a word with {self.kanji_to_match}"
            color = GRAY
        # else:
        #     text = ""
        #     color = GREEN

        self.user_input = self.large_font.render(text, True, color)
        self.user_input_rect = self.user_input.get_rect(topleft=self.prompt_rect.topright)
        self.screen.blit(self.user_input, self.user_input_rect)

예제 #37

0

파일 보기

파일: nihongo.py 프로젝트: steveYeah/nihongo

def play(word_map):
    """
    TODO
    * Score the result
    * Less praise
    * Don't say the words
    """
    random.shuffle(word_map)
    for word in word_map:
        speak(word[0])
        print(romkan.to_hiragana(word[0]))

        answer = listen()
        print(answer)

        if answer == word[1]:
            speak(correct())
        else:
            speak(bubu())
            speak(f'The correct answer was {word[1]}')

예제 #38

0

파일 보기

파일: main.py 프로젝트: EmreTekinalp/Qt

 def __setup_data(self):
     page = 'page_%s' % (self.window().stackedWidget.currentIndex() + 1)
     path = os.path.abspath(os.path.join(self.window().data_path(), page, 'config.json'))
     if not os.path.exists(path):
         return
     with open(path) as data_file:
         data = json.load(data_file)
     if not data:
         return
     for page in data:
         if page == 'topic':
             self.lineEdit.setText(data[page])
             continue
         for cell, text in data[page].items():
             row, col = cell.split(',')
             item = QTableWidgetItem(text)
             self.tableWidget.setItem(int(row), int(col), item)
             if int(col):
                 item = QTableWidgetItem(romkan.to_hiragana(text))
                 self.tableWidget.setItem(int(row), int(col), item)
     self.data = data

예제 #39

0

파일 보기

파일: onoma.py 프로젝트: SwingerOfBirches/japanese_onomatopoeia

def strings(input):
    global raw
    raw = input
    global kunrei
    kunrei = romkan.to_kunrei(input)
    # Using "kunrei" because hiragana <> katakana conversion doesn't work
    global hiragana
    hiragana = romkan.to_hiragana(kunrei) 
    global katakana
    katakana = romkan.to_katakana(kunrei)
    global hepburn
    hepburn = romkan.to_hepburn(hiragana)
    global onoma
    onoma = kunrei
    changes_dict = {'ch': 'C', 'ty':'T', 'sy':'S', 'ny': 'N', 'zy':'Z', \
                    'dj':'D', 'l':'r','xtu':'Q', 'aa':'a-','ee':'e-','ii':'i-', \
                    'oo':'o-','uu':'u-'}
    for key in changes_dict:
        onoma = onoma.replace(key, changes_dict[key])
    if onoma.endswith('tto'):
        onoma = onoma[:-3] + 'Q'
    return(hiragana + ' ' + katakana + ' ' + hepburn + ' ' + onoma)

예제 #40

0

파일 보기

def strings(input):
    global raw
    raw = input
    global kunrei
    kunrei = romkan.to_kunrei(input)
    # Using "kunrei" because hiragana <> katakana conversion doesn't work
    global hiragana
    hiragana = romkan.to_hiragana(kunrei)
    global katakana
    katakana = romkan.to_katakana(kunrei)
    global hepburn
    hepburn = romkan.to_hepburn(hiragana)
    global onoma
    onoma = kunrei
    changes_dict = {'ch': 'C', 'ty':'T', 'sy':'S', 'ny': 'N', 'zy':'Z', \
                    'dj':'D', 'l':'r','xtu':'Q', 'aa':'a-','ee':'e-','ii':'i-', \
                    'oo':'o-','uu':'u-'}
    for key in changes_dict:
        onoma = onoma.replace(key, changes_dict[key])
    if onoma.endswith('tto'):
        onoma = onoma[:-3] + 'Q'
    return (hiragana + ' ' + katakana + ' ' + hepburn + ' ' + onoma)

예제 #41

0

파일 보기

파일: jisho.py 프로젝트: SamuraiSigma/kanjisho

def game(jisho, rounds):
    """Plays the random kanji game!"""
    correct = 0
    count = rounds
    while count > 0:
        count -= 1
        kanji = random.choice(list(jisho.keys()))
        quiz = random.randint(0, 2)

        if quiz == 0:
            answer = input("What does " + kanji + " mean? ")
            if answer in jisho[kanji].meaning:
                print("Correct! :D")
                correct += 1
            else:
                print("Incorrect!", kanji, "=", jisho[kanji].meaning)

        if quiz == 1:
            answer = input("Type in one of the kunyomi of " + kanji + " : ")
            if romkan.to_hiragana(answer) in jisho[kanji].kunyomi:
                print("Correct! :D")
                correct += 1
            else:
                print("Incorrect!")
            print(kanji, "=", jisho[kanji].kunyomi)

        if quiz == 2:
            answer = input("Type in one of the onyomi of " + kanji + " : ")
            if romkan.to_katakana(answer) in jisho[kanji].onyomi:
                print("Correct! :D")
                correct += 1
            else:
                print("Incorrect!")
            print(kanji, "=", jisho[kanji].onyomi)

    print(">> You got " + str(correct) + " out of " + str(rounds)
          + " (" + str(100*correct/rounds) + "%)!\n")

예제 #42

0

파일 보기

파일: model.py 프로젝트: leoboiko/joyodb

    def to_hiragana(self):
        """Return the reading as hiragana, even if it's On.

        >>> k = Kanji('柔')
        >>> r = Reading(k, 'ニュウ')
        >>> r.to_hiragana()
        'にゅう'


        If it's not On, it's imdepotent.
        >>> k = Kanji('最')
        >>> r = Reading(k, 'もっとも')
        >>> r.add_examples('最も')
        >>> r.reading
        'もっと.も'
        >>> r.to_hiragana()
        'もっと.も'

        """

        if self.kind == 'On':
            return(romkan.to_hiragana(romkan.to_roma(self.reading)))
        else:
            return(self.reading)

예제 #43

0

파일 보기

파일: model.py 프로젝트: nasenag/joyodb

    def to_hiragana(self):
        """Return the reading as hiragana, even if it's On.

        >>> k = Kanji('柔')
        >>> r = Reading(k, 'ニュウ')
        >>> r.to_hiragana()
        'にゅう'


        If it's not On, it's imdepotent.
        >>> k = Kanji('最')
        >>> r = Reading(k, 'もっとも')
        >>> r.add_examples('最も')
        >>> r.reading
        'もっと.も'
        >>> r.to_hiragana()
        'もっと.も'

        """

        if self.kind == 'On':
            return (romkan.to_hiragana(romkan.to_roma(self.reading)))
        else:
            return (self.reading)

예제 #44

0

파일 보기

파일: ui.py 프로젝트: djentleman/Sensei

 def init(self):
     self.word = sensei.generateWord(self.lbound, self.ubound)
     self.kana.set(romkan.to_hiragana(self.word))
     self.round.set(str(self.score)+"/"+str(self.count)+" ("+str(self.curr)+")")

예제 #45

0

파일 보기

파일: search.py 프로젝트: haitike/myougiden

def generate_search_conditions(args):
    '''args = command-line argument dict (argparse object)'''

    if args.regexp:
        regexp_flags = (True,)
    elif tt.has_regexp_special(args.query_s):
        regexp_flags = (False, True)
    else:
        regexp_flags = (False,)

    if args.field != 'auto':
        fields = (args.field,)
    else:
        if tt.is_kana(args.query_s):
            fields = ('kanji', 'reading')
        else:
            fields = ('kanji', 'reading', 'gloss')

    if args.extent != 'auto':
        extents = (args.extent,)
    else:
        extents = ('whole', 'word', 'beginning', 'partial')

    conditions = []

    for regexp in regexp_flags:
        for field in fields:
            for extent in extents:

                if field == 'gloss' and extent == 'beginning' and args.extent == 'auto':
                    # when we search for e.g. 'man' in auto guesses, we
                    # typically don't want 'manatee' but not 'humanity'
                    continue

                elif field in ('kanji', 'reading') and extent == 'word':
                    if args.extent == 'auto':
                        # useless combination generated, skip
                        continue
                    else:
                        # useless combination requested, adjust
                        extent = 'whole'

                if field == 'reading' and tt.is_latin(args.query_s):
                    # 'reading' field auto-convert romaji to kana. as of this
                    # writing, JMdict has no romaji in readingfields.
                    queries = ([romkan.to_hiragana(s) for s in args.query],
                               [romkan.to_katakana(s) for s in args.query])

                    # romkan will convert ASCII hyphen-minus to CJKV long 'ー'
                    # we back-convert it in start position, to preserve FTS
                    # operator '-'.
                    def fix_hyphen(s):
                        if len(s) > 1 and s[0] == 'ー':
                            s = '-' + s[1:]
                        return s

                    queries = [[fix_hyphen(s) for s in query]
                               for query in queries]
                else:
                    queries = (args.query,)
                # TODO: add wide-char

                for query in queries:
                    conditions.append(SearchConditions(args, query, regexp, field, extent))

    return conditions

예제 #46

0

파일 보기

파일: trans.py 프로젝트: ningirsu/taemin

 def _to_hiragana(self):
     return romkan.to_hiragana(self.word).encode("utf-8")

예제 #47

0

파일 보기

파일: ime.py 프로젝트: renormalizable/ircbot

async def gimnew(arg, send):
    table = {
        # pinyin
        'pinyins':          'zh-t-i0-pinyin',
        'pinyint':          'zh-hant-t-i0-pinyin',
        # wubi
        'wubi':             'zh-t-i0-wubi-1986',
        # shuangpin
        'shuangpinabc':     'zh-t-i0-pinyin-x0-shuangpin-abc',
        'shuangpinms':      'zh-t-i0-pinyin-x0-shuangpin-ms',
        'shuangpinflypy':   'zh-t-i0-pinyin-x0-shuangpin-flypy',
        'shuangpinjiajia':  'zh-t-i0-pinyin-x0-shuangpin-jiajia',
        'shuangpinziguang': 'zh-t-i0-pinyin-x0-shuangpin-ziguang',
        'shuangpinziranma': 'zh-t-i0-pinyin-x0-shuangpin-ziranma',
        # zhuyin
        'zhuyin':           'zh-hant-t-i0-und',
        # for blackberry layout
        'zhuyinbb':         'zh-hant-t-i0-und',
        # cangjie
        'cangjie':          'zh-hant-t-i0-cangjie-1982',
        # yue
        'yue':              'yue-hant-t-i0-und',
        # ja
        'ja':               'ja-t-ja-hira-i0-und',
    }
    alias = {
        # default
        'chs':              'pinyins',
        'cht':              'pinyint',
        'pinyin':           'pinyins',
        'shuangpin':        'shuangpinflypy',
        'udpn':             'shuangpinziranma',
        # less is more
        'py':               'pinyins',
        'wb':               'wubi',
        'sp':               'shuangpinflypy',
        'zrm':              'shuangpinziranma',
        'zy':               'zhuyin',
        'cj':               'cangjie',
        # alias
        'ggtt':             'wubi',
        'vtpc':             'shuangpinabc',
        'udpnms':           'shuangpinms',
        'ulpb':             'shuangpinflypy',
        'ihpl':             'shuangpinjiajia',
        'igpy':             'shuangpinziguang',
        'udpnzrm':          'shuangpinziranma',
        '5j4up=':           'zhuyin',
        'rhnyoo$':          'zhuyinbb',
        'oiargrmbc':        'cangjie',
        'yut':              'yue',
        # alt
        'jp':               'ja',
    }

    def parse(reg, text, f, g):
        line = []
        pos = 0
        for m in reg.finditer(text):
            #print('parse: {}'.format(repr(text[pos:m.start()])))
            line.extend(f(text[pos:m.start()]))
            line.extend(g(m.group()))
            pos = m.end()
        line.extend(f(text[pos:]))

        return line

    def replace(text, rule):
        if not rule:
            return text
        (f, t) = rule[0]
        parts = text.split(f)
        return t.join(replace(part, rule[1:]) for part in parts)

    try:
        lang = arg['lang'] or 'chs'
        lang = alias.get(lang, lang)
        itc = table[lang]
    except:
        #raise Exception("this method is not supported yet...")
        raise Exception("Do you REALLY need this input method?")

    if lang == 'zhuyin':
        sep = re.compile(r"([^a-z'0-9\-;,./=]+)")
        comment = re.compile(r"(?:(?<=[^a-z'0-9\-;,./=])|^)''(.*?)''(?:(?=[^a-z'0-9\-;,./=])|$)")
    elif lang == 'zhuyinbb':
        sep = re.compile(r"([^a-z'0$]+)")
        comment = re.compile(r"(?:(?<=[^a-z'0$])|^)''(.*?)''(?:(?=[^a-z'0$])|$)")
    elif lang == 'ja':
        sep = re.compile(r"([^a-z'\-]+)")
        comment = re.compile(r"(?:(?<=[^a-z'\-])|^)''(.*?)''(?:(?=[^a-z'\-])|$)")
    else:
        sep = re.compile(r"([^a-z']+)")
        comment = re.compile(r"(?:(?<=[^a-z'])|^)''(.*?)''(?:(?=[^a-z'])|$)")

    text = arg['text']

    line = parse(comment, text,
        lambda t: parse(sep, t,
            #lambda x: [(True, e) for e in x.split("'")] if x != '' and x[0].islower() else [(False, x)],
            # for zhuyin
            lambda x: [(True, e) for e in x.split("'")] if x != '' else [(False, x)],
            lambda x: [(False, x)]
        ),
        lambda t: [(False, t[2:-2])]
    )

    if lang == 'ja':
        tmp = []
        for e in line:
            if e[0]:
                tmp.append((e[0], romkan.to_hiragana(e[1])))
            else:
                tmp.append(e)
        line = tmp
    elif lang == 'zhuyinbb':
        tmp = []
        for e in line:
            if e[0]:
                t = [
                    ('aa', 'z'),
                    ('dd', 'f'),
                    ('ee', 'r'),
                    ('ii', 'o'),
                    ('jj', ','),
                    ('kk', '.'),
                    ('ll', '/'),
                    ('oo', 'p'),
                    ('qq', 'q'),
                    ('rr', 't'),
                    ('ss', 'x'),
                    ('uu', 'i'),
                    ('ww', 'w'),
                    ('xx', 'v'),
                    ( 'a', 'a'),
                    ( 'b', 'm'),
                    ( 'c', 'b'),
                    ( 'd', 'd'),
                    ( 'e', 'e'),
                    ( 'f', 'g'),
                    ( 'g', 'h'),
                    ( 'h', 'j'),
                    ( 'i', '9'),
                    ( 'j', 'k'),
                    ( 'k', 'l'),
                    ( 'l', ';'),
                    ( 'm', '7'),
                    ( 'n', '4'),
                    ( 'o', '0'),
                    ( 'p', '-'),
                    ( 'q', '1'),
                    ( 'r', '5'),
                    ( 's', 's'),
                    ( 't', 'y'),
                    ( 'u', '8'),
                    ( 'v', 'n'),
                    ( 'w', '2'),
                    ( 'x', 'c'),
                    ( 'y', 'u'),
                    ( 'z', '6'),
                    ( '0', '3'),
                    ( '$', '='),
                ]
                tmp.append((e[0], replace(e[1], t)))
            else:
                tmp.append(e)
        line = tmp
    print(line)

    im = GIMNEW(itc)
    await im(line, send)

예제 #48

0

파일 보기

파일: ime.py 프로젝트: renormalizable/ircbot

async def kana(arg, send):
    send(romkan.to_hiragana(arg['romaji']))

예제 #49

0

파일 보기

파일: gui.py 프로젝트: Braz3n/JapaneseConjugationFlashCards

 def __convertToHiragana(self, match):
     if match.group() == "n":
         return "n"
     return to_hiragana(match.group())

예제 #50

0

파일 보기

파일: main.py 프로젝트: kraftydinosaur/Wox.Plugin.Rom2Kana

 def copy(self, text):
     clipboard.copy(romkan.to_hiragana(text))

예제 #51

0

파일 보기

파일: generator.py 프로젝트: vforgione/kana_quiz

        alt = '"TI"'
    elif i == 17:
        char = 'TSU'
        alt = '"TU"'
    elif i == 52:
        char = 'JI'
        alt = '"ZI"'
    elif i == 57:
        char = 'JI'
        alt = '"DI"'
    elif i == 58:
        char = 'ZU'
        alt = '"DU"'

    if alt is None:
        alt = 'null'

    fixture = '{ "model": "kana.character", "pk": %(pk)s, "fields": { "romaji": "%(romaji)s", ' \
              '"hiragana": "%(hiragana)s", "katakana": "%(katakana)s", "is_plain": %(is_plain)s,' \
              '"is_dakuten": %(is_dakuten)s, "is_handakuten": %(is_handakuten)s, "is_youon": %(is_youon)s,' \
              '"gojuon_row": %(gojuon_row)s, "gojuon_col": %(gojuon_col)s, "alternate_romaji": %(alt)s, ' \
              '"notes": null } },'
    args = {
        'pk': i, 'romaji': char, 'hiragana': romkan.to_hiragana(char), 'katakana': romkan.to_katakana(char),
        'is_plain': str(is_plain).lower(), 'is_dakuten': str(is_dakuten).lower(),
        'is_handakuten': str(is_handakuten).lower(), 'is_youon': str(is_youon).lower(),
        'gojuon_row': str(gojuon_row), 'gojuon_col': str(gojuon_col), 'alt': alt
    }

    print(fixture % args)

예제 #52

0

파일 보기

파일: json_to_hiragana.py 프로젝트: hmrm/terminal-poetry

import sys
import json
import romkan

poems = json.loads(sys.stdin.read())
for poem in poems:
    for line in poem[u'Japanese'][u'Text']:
        print romkan.to_hiragana(filter(lambda char: char != ' ', line))
    print ''

예제 #53

0

파일 보기

파일: main.py 프로젝트: EmreTekinalp/Qt

 def to_hiragana(self):
     self.lang_mode = 0
     for item in self.get_japanese_items():
         item.setText(romkan.to_hiragana(romkan.to_roma(item.text())))

예제 #54

0

파일 보기

 def _to_hiragana(self):
     return romkan.to_hiragana(self.word)

예제 #55

0

파일 보기

파일: gen-hira.py 프로젝트: WydD/kana-test

import re
import romkan

entries = set()
for i, entry in enumerate(open("edict2", encoding="euc-jp")):
    if i == 0:
        continue
    m = re.search("^[^/]*\\[([ぁ-んァ-ン]*)\\]", entry)
    if not m:
        continue
    entries.add(romkan.to_hiragana(romkan.to_roma(m.groups(1)[0])))

w = open("./hira.list", "w")
for e in entries:
    w.write(e+"\n")
w.close()

예제 #56

0

파일 보기

파일: dict.py 프로젝트: vivi168/pyJdict

import sqlite3
import sys
import romkan

con = sqlite3.connect('jdict.sqlite')
con.text_factory = str
cur = con.cursor()

if len(sys.argv) != 2:
    print "usage: python dict.py word"
    sys.exit("error: bad arguments")

word = sys.argv[1]

jpq = 'SELECT * FROM edict WHERE word LIKE "' + word + '"'
rq = 'SELECT * FROM edict WHERE kana LIKE "' + romkan.to_hiragana(unicode(word, 'utf-8')) + '"'
enq = 'SELECT * FROM edict WHERE english LIKE "%' + word + '%"'

cur.execute(jpq)
rows = cur.fetchall()
for row in rows:
    print row[0] + '【' + row[1] + '】' + row[2] +'\n'

cur.execute(rq)
rows = cur.fetchall()
for row in rows:
    print row[0] + '【' + row[1] + '】' + row[2] +'\n'

cur.execute(enq)
rows = cur.fetchall()
for row in rows:

예제 #57

0

파일 보기

파일: edict.py 프로젝트: iccanobif/KanjiLookup

 def normalizeInput(self, text):
     text = romkan.to_hiragana(text.replace(" ", ""))    
     text = romkan.katakana_to_hiragana(text.lower())
     return text