def open_ust(file_name): song = [] instance = {} bpm = 0.0 for strm in open(file_name, "r"): if strm.strip().startswith("["): if len(instance) > 0: if instance.get("Tempo", None): bpm = float(".".join(instance["Tempo"].split(","))) if instance.get("Lyric", None): if len(instance["Lyric"].split(" ")) > 1: instance["Lyric"] = instance["Lyric"].split(" ")[-1] if "R" in instance["Lyric"] or "息" in instance["Lyric"]: instance["Lyric"] = "" if hira_p.search(unicode(instance["Lyric"])): instance["Lyric"] = re.sub("[A-Za-z]+", "", instance["Lyric"]) else: instance["Lyric"] = romkan.to_katakana( instance["Lyric"]) instance["Lyric"] = re.sub( u"[^ァ-ン]", "", unicode(instance["Lyric"])).encode("utf8") instance["Tempo"] = str(bpm) if "NoteNum" in instance: if instance["Lyric"] == "": instance["NoteNum"] = "rest" song.append(instance) instance = {"InstanceIdx": strm.strip().lstrip("[#").rstrip("]")} else: if len(strm.strip().split("=")) < 2: continue key, value = strm.strip().split("=") if key == "Lyric": value = jaconv.hira2kata(unicode(value)).encode("utf8") if key in ("NoteNum", "Lyric", "Length", "Tempo"): instance[key] = value if len(instance) > 0: if instance.get("Tempo", None): bpm = float(".".join(instance["Tempo"].split(","))) if instance.get("Lyric", None): if len(instance["Lyric"].split(" ")) > 1: instance["Lyric"] = instance["Lyric"].split(" ")[-1] if "R" in instance["Lyric"] or "息" in instance["Lyric"]: instance["Lyric"] = "" if hira_p.search(unicode(instance["Lyric"])): instance["Lyric"] = re.sub("[A-Za-z]+", "", instance["Lyric"]) else: instance["Lyric"] = romkan.to_katakana(instance["Lyric"]) instance["Lyric"] = re.sub(u"[^ァ-ン]", "", unicode( instance["Lyric"])).encode("utf8") instance["Tempo"] = str(bpm) if "NoteNum" in instance: if instance["Lyric"] == "": instance["NoteNum"] = "rest" song.append(instance) return song
def reading_form(cls, text): tokens = cls.sudachi.tokenize(text, cls._mode) for t in (x for x in tokens if x.part_of_speech()[0] not in {"空白", "補助記号"}): if not (v := t.reading_form()): # 読みが存在しない場合はromkanでカタカナへの変換を試みる 例: tarou v = romkan.to_katakana(t.surface()) else: # カタカナでなく英数字が返ることがあるので変換する 例: shi v = romkan.to_katakana(v) # 半角カタカナを全角にしておく # 辞書に読みが存在しない場合、半角カタカナが混じり、romkan.to_hepburnが反応しなくなるため v = jaconv.h2z(v, ignore="", kana=True, ascii=False, digit=False) yield v
def search(jisho, all_mode, rounds, verbose): """Dictionary mode, where kanji previously read can be searched""" if all_mode: show_all(jisho) elif rounds > 0: game(jisho, rounds) else: for x in sys.argv[1:]: if x in jisho: jisho[x]() else: hiragana = romkan.to_hiragana(x) katakana = romkan.to_katakana(x) for kanji in jisho: if x in jisho[kanji].meaning: if verbose is False: print(kanji, "=", x) else: jisho[kanji]() elif hiragana in jisho[kanji].kunyomi: if verbose is False: print(kanji, "->", hiragana) else: jisho[kanji]() elif katakana in jisho[kanji].onyomi: if verbose is False: print(kanji, "~>", katakana) else: jisho[kanji]()
def subvert(expr, t): import romkan expr = re.sub(u'\u014D|\u00F4', 'ou', expr, re.UNICODE) expr = re.sub(u'\u016B', 'uu', expr, re.UNICODE) expr = re.sub(u'\u0113', 'ee', expr, re.UNICODE) expr = re.sub(u'\u0101', 'aa', expr, re.UNICODE) if t == KATA: kana = romkan.to_katakana(expr) else: kana = romkan.to_hiragana(expr) return kana
def calculatePartsKana(self, aString): s = set() kana = romkan.to_katakana(romkan.to_roma(aString)) for e in aString.split(" "): s |= self.calculatePartsElement(kana) return self.serializeSet(s)
def converter(): frase = ent_romaji.get() ent_hiraga.delete(0, tk.END) ent_hiraga.insert(0, romkan.to_hiragana(frase)) ent_kataka.delete(0, tk.END) ent_kataka.insert(0, romkan.to_katakana(frase)) pass
def romaji_to_kana(request): """ Converts romaji in either katakana or hiragana. """ word = request.query_params.get('word', '')[0:1000] return Response( { 'hiragana': romkan.to_hiragana(word), 'katakana': romkan.to_katakana(word) }, status=status.HTTP_200_OK)
def kana(self, msg, args): """Converts Romazi to kana""" result = "Am I supposed to guess the word you want?..." word = None if len(args) == 1: word = args[0] elif len(args) > 1: word = " ".join(args) if word is not None: if word.isupper(): result = romkan.to_katakana(word) elif word.islower(): result = romkan.to_hiragana(word) return result
def lookup(self, romaji, channel, use_romkan=False): ''' Initiate an asynchronous transform of romaji to katakana This can be done by a local handler (use_romkan=True) Or a network service www. ''' if use_romkan: katakana = romkan.to_katakana(romaji) self.callback(romaji, channel, katakana) else: #initiate a network lookup of romaji to katakana result = getPage('http://www.sljfaq.org/cgi/e2k.cgi?word={romaji}'.format(romaji=romaji)) result.addCallbacks( callback = GetKatakana.HTMLResponse(self.callback, romaji, channel), errback = GetKatakana.HTMLError(self.error_callback))
async def romkan(self, ctx, *, text: commands.clean_content): """Convert romaji into hiragana or katakana, or vice-versa.""" if text[:3] in ["hg ", "kk ", "ro "]: tp, text = text[:2], text[3:] else: tp = ctx.invoked_with if tp == "romkan": return await ctx.send( "Please either use `!hg`, `!kk` or `!ro` (for hiragana, katakana and romaji respectively), or pass the type as an argument: `!romkan hg LyricLy wa baka desu yo`" ) if tp == "hg": await ctx.send(romkan.to_hiragana(text)) elif tp == "kk": await ctx.send(romkan.to_katakana(text)) elif tp == "ro": await ctx.send(romkan.to_hepburn(text))
def command_ja(self, event): '''Usage: ~ja <k/h/r> <arg> displays katakana/hiragana/romaji for a given argument, converting between romaji and kana''' try: dest, phrase = event.params.split(' ', 1) dest = dest.lower() if dest == 'k': resp = romkan.to_katakana(phrase) elif dest == 'h': resp = romkan.to_hiragana(phrase) elif dest == 'r': resp = romkan.to_roma(phrase.decode('utf-8')) else: raise self.send_message(event.respond, resp) except: self.send_message(event.respond, 'Invalid input, please check syntax.') raise
def lookup(rad_dawg, dict_dawg, pattern): components = [] # sanity if len(pattern) > 40 and pattern.count('[') > 20: return [] # romaji => kana pattern = re.sub('[-A-Z]+', lambda m: romkan.to_katakana(m.group(0)), pattern) pattern = re.sub('[-a-z]+', lambda m: romkan.to_hiragana(m.group(0)), pattern) for c in PATTERN_RE.findall(pattern): if c[0] == '[' and c[-1] == ']': s = rad_dawg.lookup_kanji(u''.join(c[1:-1])) components.append(s) else: components.append(set([c])) return dict_dawg.lookup_word(components)[:MAX_RESULTS]
def strings(input): global raw raw = input global kunrei kunrei = romkan.to_kunrei(input) # Using "kunrei" because hiragana <> katakana conversion doesn't work global hiragana hiragana = romkan.to_hiragana(kunrei) global katakana katakana = romkan.to_katakana(kunrei) global hepburn hepburn = romkan.to_hepburn(hiragana) global onoma onoma = kunrei changes_dict = {'ch': 'C', 'ty':'T', 'sy':'S', 'ny': 'N', 'zy':'Z', \ 'dj':'D', 'l':'r','xtu':'Q', 'aa':'a-','ee':'e-','ii':'i-', \ 'oo':'o-','uu':'u-'} for key in changes_dict: onoma = onoma.replace(key, changes_dict[key]) if onoma.endswith('tto'): onoma = onoma[:-3] + 'Q' return(hiragana + ' ' + katakana + ' ' + hepburn + ' ' + onoma)
def strings(input): global raw raw = input global kunrei kunrei = romkan.to_kunrei(input) # Using "kunrei" because hiragana <> katakana conversion doesn't work global hiragana hiragana = romkan.to_hiragana(kunrei) global katakana katakana = romkan.to_katakana(kunrei) global hepburn hepburn = romkan.to_hepburn(hiragana) global onoma onoma = kunrei changes_dict = {'ch': 'C', 'ty':'T', 'sy':'S', 'ny': 'N', 'zy':'Z', \ 'dj':'D', 'l':'r','xtu':'Q', 'aa':'a-','ee':'e-','ii':'i-', \ 'oo':'o-','uu':'u-'} for key in changes_dict: onoma = onoma.replace(key, changes_dict[key]) if onoma.endswith('tto'): onoma = onoma[:-3] + 'Q' return (hiragana + ' ' + katakana + ' ' + hepburn + ' ' + onoma)
def game(jisho, rounds): """Plays the random kanji game!""" correct = 0 count = rounds while count > 0: count -= 1 kanji = random.choice(list(jisho.keys())) quiz = random.randint(0, 2) if quiz == 0: answer = input("What does " + kanji + " mean? ") if answer in jisho[kanji].meaning: print("Correct! :D") correct += 1 else: print("Incorrect!", kanji, "=", jisho[kanji].meaning) if quiz == 1: answer = input("Type in one of the kunyomi of " + kanji + " : ") if romkan.to_hiragana(answer) in jisho[kanji].kunyomi: print("Correct! :D") correct += 1 else: print("Incorrect!") print(kanji, "=", jisho[kanji].kunyomi) if quiz == 2: answer = input("Type in one of the onyomi of " + kanji + " : ") if romkan.to_katakana(answer) in jisho[kanji].onyomi: print("Correct! :D") correct += 1 else: print("Incorrect!") print(kanji, "=", jisho[kanji].onyomi) print(">> You got " + str(correct) + " out of " + str(rounds) + " (" + str(100*correct/rounds) + "%)!\n")
def open_ust(file_name): song = [] instance = {} currentBeat = defaultdict(float) bpm = 0.0 currentTime = 0.0 for strm in open(file_name, "r"): if strm.strip().startswith("["): if len(instance) > 0: if instance.get("Tempo", None): bpm = float(".".join(instance["Tempo"].split(","))) if instance.get("Length", None): currentTime = get_beat2time(currentBeat) instance["StartTime"] = currentTime currentRhythm = get_beat2rhythm(currentBeat) instance["Beat"] = currentRhythm currentBeat[bpm] += float(instance["Length"]) m, s = divmod(currentTime, 60) h, m = divmod(m, 60) instance["StartTimeReadable"] = "%d/%d/%s" % (h, m, s) instance["Duration"] = str( 60.0 / bpm * (float(instance["Length"]) / 480.0)) if instance.get("Lyric", None): if len(instance["Lyric"].split(" ")) > 1: instance["Lyric"] = instance["Lyric"].split(" ")[-1] if "R" in instance["Lyric"] or "息" in instance["Lyric"]: instance["Lyric"] = "" if hira_p.search(unicode(instance["Lyric"])): instance["Lyric"] = re.sub("[A-Za-z]+", "", instance["Lyric"]) else: instance["Lyric"] = romkan.to_katakana( instance["Lyric"]) instance["Lyric"] = re.sub( u"[^ァ-ン]", "", unicode(instance["Lyric"])).encode("utf8") instance["Tempo"] = str(bpm) song.append(instance) instance = {"InstanceIdx": strm.strip().lstrip("[#").rstrip("]")} else: if len(strm.strip().split("=")) < 2: continue key, value = strm.strip().split("=") if key == "Lyric": value = jaconv.hira2kata(unicode(value)).encode("utf8") instance[key] = value if len(instance) > 0: if instance.get("Tempo", None): bpm = float(".".join(instance["Tempo"].split(","))) if instance.get("Length", None): currentTime = get_beat2time(currentBeat) instance["StartTime"] = currentTime currentRhythm = get_beat2rhythm(currentBeat) instance["Beat"] = currentRhythm currentBeat[bpm] += float(instance["Length"]) m, s = divmod(currentTime, 60) h, m = divmod(m, 60) instance["StartTimeReadable"] = "%d/%d/%s" % (h, m, s) instance["Duration"] = str(60.0 / bpm * (float(instance["Length"]) / 480.0)) if instance.get( "Lyric", None ): # extract HIRAGANA (NOTE: Sometimes VOCALOID-specific characters are included) if len(instance["Lyric"].split(" ")) > 1: instance["Lyric"] = instance["Lyric"].split(" ")[-1] if "R" in instance["Lyric"] or "息" in instance["Lyric"]: instance["Lyric"] = "" if hira_p.search(unicode(instance["Lyric"])): instance["Lyric"] = re.sub("[A-Za-z]+", "", instance["Lyric"]) else: instance["Lyric"] = romkan.to_katakana(instance["Lyric"]) instance["Lyric"] = re.sub(u"[^ァ-ン]", "", unicode( instance["Lyric"])).encode("utf8") instance["Tempo"] = str(bpm) song.append(instance) return song
def generate_search_conditions(args): '''args = command-line argument dict (argparse object)''' if args.regexp: regexp_flags = (True,) elif tt.has_regexp_special(args.query_s): regexp_flags = (False, True) else: regexp_flags = (False,) if args.field != 'auto': fields = (args.field,) else: if tt.is_kana(args.query_s): fields = ('kanji', 'reading') else: fields = ('kanji', 'reading', 'gloss') if args.extent != 'auto': extents = (args.extent,) else: extents = ('whole', 'word', 'beginning', 'partial') conditions = [] for regexp in regexp_flags: for field in fields: for extent in extents: if field == 'gloss' and extent == 'beginning' and args.extent == 'auto': # when we search for e.g. 'man' in auto guesses, we # typically don't want 'manatee' but not 'humanity' continue elif field in ('kanji', 'reading') and extent == 'word': if args.extent == 'auto': # useless combination generated, skip continue else: # useless combination requested, adjust extent = 'whole' if field == 'reading' and tt.is_latin(args.query_s): # 'reading' field auto-convert romaji to kana. as of this # writing, JMdict has no romaji in readingfields. queries = ([romkan.to_hiragana(s) for s in args.query], [romkan.to_katakana(s) for s in args.query]) # romkan will convert ASCII hyphen-minus to CJKV long 'ー' # we back-convert it in start position, to preserve FTS # operator '-'. def fix_hyphen(s): if len(s) > 1 and s[0] == 'ー': s = '-' + s[1:] return s queries = [[fix_hyphen(s) for s in query] for query in queries] else: queries = (args.query,) # TODO: add wide-char for query in queries: conditions.append(SearchConditions(args, query, regexp, field, extent)) return conditions
(current[1] + 1):i] correct += " " + roma[current[0]:(current[1] + 1)] + " " + roma[ (current[1] + 1):i] current = [i, i] if current is not None: result += "[" + answer[current[0]:(current[1] + 1)] + "]" + answer[ (current[1] + 1):] correct += " " + roma[current[0]:(current[1] + 1)] + " " + roma[(current[1] + 1):] return result, correct while True: i = random.randint(0, len(words)) to_write = words[i] roma = romkan.to_roma(to_write).strip() if mode == 2: to_write = romkan.to_katakana(roma) + "\n" tries = 0 while tries < max_tries: answer = input(to_write + "> ").strip() if answer == roma: print("\tcorrect!") break else: print("\tWRONG!") tries += 1 if tries == max_tries: errors, correct = find_error(answer, roma) print("\tAnswer was " + correct + "\n\tYou wrote: " + errors)
def to_katakana(romaji: str) -> str: return romkan.to_katakana(romaji)
def _to_katakana(self): return romkan.to_katakana(self.word)
def render(self): super(SearchResults,self).clear(); (h,w) = self.win.getmaxyx(); for i,entry in enumerate(self.results): kele = entry.get("k_ele"); kele = ([kele] if not isinstance(kele,list) else kele) if kele is not None else []; rele = entry.get("r_ele"); rele = ([rele] if not isinstance(rele,list) else rele) if rele is not None else []; index = 0; #re_restr kanji index #create kanji->reading dictionary for read restrictions kett = {}; for r in rele: try: for restr in loopOn(r["re_restr"]): if kett.get(restr) is None: kett[restr] = index; index += 1; except KeyError: continue; #construct the lines for the list view fln = ""; for k in kele: if len(fln) > 0: fln += ", "; fln += k["keb"]; try: fln += "["+str(kett[k["keb"]])+"]"; except KeyError: pass; for r in rele: if len(fln) > 0: fln += ", "; fln += r["reb"]; try: for restr in loopOn(r["re_restr"]): fln += "["+str(kett[restr])+"]"; except KeyError: pass; sense = entry["sense"]; gln = ""; for s in loopOn(sense): for g in loopOn(s["gloss"]): if len(gln) > 0: gln += ", "; gln += g["#text"]; tagged = False; for te in self.jisho.tagdict: if entry["ent_seq"] in self.jisho.tagdict[te]: fln += " ["+u"\u2764 "+te+"]"; tagged = True; hiragana = romkan.to_hiragana(self.query); hiragana = self.kr.sub("",hiragana); katakana = romkan.to_katakana(self.query); katakana = self.kr.sub("",katakana); qs = [(self.query,len(self.query)),(hiragana,len(hiragana)),(katakana,len(katakana))]; for s in [fln,gln]: try: #highlight the query c = [(1,4), [(2+i%2,5+i%2),(20,21)][tagged] ][self.sel != i]; q = 0; while qs[0][1] > 0: Q = -1; l = +0; for qe in qs: if qe[1] == 0: continue; Q1 = s.find(qe[0],q); l1 = qe[1]; if Q1 != -1 and (Q1 < Q or Q == -1): Q = Q1; l = l1; if Q == -1: break; self.win.addstr(s[q:Q],curses.color_pair(c[0])); self.win.addstr(s[Q:Q+l],curses.color_pair(c[1])); q = Q+l; self.win.addstr(s[q:],curses.color_pair(c[0])); #self.win.addstr(s,curses.color_pair(c[0])); (_,x) = self.win.getyx(); self.win.addstr(' '*(w-x),curses.color_pair(c[0])); except curses.error: break; else: continue; break; self.win.refresh();
def process(cls, v: str): v = romkan.to_hepburn(v) v = romkan.to_katakana(v) return v
correct = roma[:i] elif i - 1 == current[1]: current[1] = i else: result += "[" + answer[current[0]:(current[1] + 1)] + "]" + answer[(current[1] + 1):i] correct += " " + roma[current[0]:(current[1] + 1)] + " " + roma[(current[1] + 1):i] current = [i, i] if current is not None: result += "["+answer[current[0]:(current[1]+1)]+"]"+answer[(current[1]+1):] correct += " "+roma[current[0]:(current[1]+1)]+" " + roma[(current[1]+1):] return result, correct while True: i = random.randint(0, len(words)) to_write = words[i] roma = romkan.to_roma(to_write).strip() if mode == 2: to_write = romkan.to_katakana(roma)+"\n" tries = 0 while tries < max_tries: answer = input(to_write+"> ").strip() if answer == roma: print("\tcorrect!") break else: print("\tWRONG!") tries += 1 if tries == max_tries: errors, correct = find_error(answer, roma) print("\tAnswer was "+correct+"\n\tYou wrote: "+errors)
def parse_single(txt): if txt.isupper(): return romkan.to_katakana(txt) else: return romkan.to_hiragana(txt)
def _to_katakana(self): return romkan.to_katakana(self.word).encode("utf-8")
elif i == 57: char = 'JI' alt = '"DI"' elif i == 58: char = 'ZU' alt = '"DU"' if alt is None: alt = 'null' fixture = '{ "model": "kana.character", "pk": %(pk)s, "fields": { "romaji": "%(romaji)s", ' \ '"hiragana": "%(hiragana)s", "katakana": "%(katakana)s", "is_plain": %(is_plain)s,' \ '"is_dakuten": %(is_dakuten)s, "is_handakuten": %(is_handakuten)s, "is_youon": %(is_youon)s,' \ '"gojuon_row": %(gojuon_row)s, "gojuon_col": %(gojuon_col)s, "alternate_romaji": %(alt)s, ' \ '"notes": null } },' args = { 'pk': i, 'romaji': char, 'hiragana': romkan.to_hiragana(char), 'katakana': romkan.to_katakana(char), 'is_plain': str(is_plain).lower(), 'is_dakuten': str(is_dakuten).lower(), 'is_handakuten': str(is_handakuten).lower(), 'is_youon': str(is_youon).lower(), 'gojuon_row': str(gojuon_row), 'gojuon_col': str(gojuon_col), 'alt': alt } print(fixture % args)
def __convertToKatakana(self, match): if match.group() == "N": return "N" return to_katakana(match.group()).upper()
def onromaji_to_katakana(reading): return reading return romkan.to_katakana(reading).replace(':', 'ウ')
def generate_search_conditions(args): '''args = command-line argument dict (argparse object)''' if args.regexp: regexp_flags = (True,) elif tt.has_regexp_special(args.query_s): regexp_flags = (False, True) else: regexp_flags = (False,) if args.field != 'auto': fields = (args.field,) else: if tt.is_kana(args.query_s): fields = ('kanji', 'reading') else: fields = ('kanji', 'reading', 'gloss') if args.extent != 'auto': extents = (args.extent,) else: extents = ('whole', 'word', 'beginning', 'partial') conditions = [] for regexp in regexp_flags: for field in fields: for extent in extents: if field == 'gloss' and extent == 'beginning' and args.extent == 'auto': # when we search for e.g. 'man' in auto guesses, we # typically don't want 'manatee' but not 'humanity' continue elif field in ('kanji', 'reading') and extent == 'word': if args.extent == 'auto': # useless combination generated, skip continue else: # useless combination requested, adjust extent = 'whole' if field == 'reading' and tt.is_latin(args.query_s): # 'reading' field auto-convert romaji to kana. as of this # writing, JMdict has no romaji in readingfields. queries = ([romkan.to_hiragana(s) for s in args.query], [romkan.to_katakana(s) for s in args.query]) # romkan will convert ASCII hyphen-minus to CJKV long 'ー' # we back-convert it in start position, to preserve FTS # operator '-'. def fix_hyphen(s): if len(s) > 1 and s[0] == 'ー': s = '-' + s[1:] return s queries = [[fix_hyphen(s) for s in query] for query in queries] else: queries = (args.query,) # TODO: add wide-char for query in queries: conditions.append(SearchConditions( args, query, regexp, field, extent)) return conditions
def multiscrape(self, name, shy=False): if shy and self.config.has_key(name) and self.config[name]['ja']: return if name != u'名前' and name != u'ふりがな': nodes = self.root.xpath("//_:li/_:strong[contains(text(), '%s')]/following-sibling::_:ul/_:li|//_:h4[contains(text(), '%s')]/following-sibling::_:p" % (name, name), namespaces=NS) else: nodes = self.root.xpath("//_:h3", namespaces=NS) if not nodes: return iterator = nodes[0].itertext() val = '' l = [] while 1: try: val = iterator.next() val = re.sub(u'^[ \r\n]+', '', val) val = re.sub(u'[ \r\n]+$', '', val) if val: l.append(val) except: break val = re.sub('^[ \n]*(.*?)[ \n]*$', '\\1', '\n'.join(l)) val = val.strip() val = makeHankaku(val) if name == u'名前': lst = val.split('\n') if not self.config.has_key(name): self.config[name] = {} self.config[name]['ja'] = lst[0] elif name == u'ふりがな' and not shy: if not self.config.has_key(u'名前'): self.config[u'名前'] = {} lst = val.split('\n') if len(lst) > 1: suzure = lst[1].replace(u' ', '').replace(' ', '') self.config[u'名前']['kana'] = lst[1] self.config[u'名前']['en'] = titleCase(lst[1]) self.config[u'並べ替え']['ja'] = romkan.to_katakana(romkan.to_kunrei(suzure)) self.config[u'並べ替え']['en'] = romkan.to_roma(suzure) else: self.config[u'名前']['kana'] = '' elif name == u'所属': if not self.config.has_key(u'所属'): self.config[u'所属'] = {} if self.staffType == 'LS' or self.staffType == 'PRO': self.config[u'所属']['ja'] = u'法科大学院' self.config[u'所属']['en'] = 'Law School (professional course)' elif name == u'役職': if not self.config.has_key(u'役職'): self.config[u'役職'] = {} self.config[u'役職']['ja'] = getPostJapanese(val) self.config[u'役職']['en'] = getPostEnglish(self.config[u'役職']['ja']) elif name == u'所属学会': if not self.config.has_key(u'学会'): self.config[u'学会'] = {} if len(val.split('\n')) > 1: self.config[u'学会']['ja'] = val.split('\n') else: self.config[u'学会']['ja'] = val elif name == u'教員からのメッセージ': if not self.config.has_key(u'法科大学院メッセージ'): self.config[u'法科大学院メッセージ'] = {} self.config[u'法科大学院メッセージ']['ja'] = val.split('\n') elif name == u'リンク': for node in nodes: subnode = node.xpath('.//_:a[@href]', namespaces=NS) if subnode and len(subnode): self.config[u'ホームページ']['ja'] = subnode[0].text self.config[u'ホームページ'][u'リンク'] = subnode[0].attrib['href'] break else: if not self.config.has_key(name): self.config[name] = {} if len(val.split('\n')) > 1: self.config[name]['ja'] = val.split('\n') if name == u'専門分野' and self.config[name]['ja'][0]: self.config[name]['en'] = fieldsMap[self.config[name]['ja'][0]] else: self.config[name]['ja'] = val if name == u'専門分野' and self.config[name]['ja']: self.config[name]['en'] = fieldsMap[self.config[name]['ja']]
alt = '"TI"' elif i == 17: char = 'TSU' alt = '"TU"' elif i == 52: char = 'JI' alt = '"ZI"' elif i == 57: char = 'JI' alt = '"DI"' elif i == 58: char = 'ZU' alt = '"DU"' if alt is None: alt = 'null' fixture = '{ "model": "kana.character", "pk": %(pk)s, "fields": { "romaji": "%(romaji)s", ' \ '"hiragana": "%(hiragana)s", "katakana": "%(katakana)s", "is_plain": %(is_plain)s,' \ '"is_dakuten": %(is_dakuten)s, "is_handakuten": %(is_handakuten)s, "is_youon": %(is_youon)s,' \ '"gojuon_row": %(gojuon_row)s, "gojuon_col": %(gojuon_col)s, "alternate_romaji": %(alt)s, ' \ '"notes": null } },' args = { 'pk': i, 'romaji': char, 'hiragana': romkan.to_hiragana(char), 'katakana': romkan.to_katakana(char), 'is_plain': str(is_plain).lower(), 'is_dakuten': str(is_dakuten).lower(), 'is_handakuten': str(is_handakuten).lower(), 'is_youon': str(is_youon).lower(), 'gojuon_row': str(gojuon_row), 'gojuon_col': str(gojuon_col), 'alt': alt } print(fixture % args)
def eval_force_romaji_to_kana_v1(self, text, kana_ref, nbest=20): p_ = jaconv.z2h(text, digit=True, ascii=True, kana=False) # hankaku p = romkan.to_katakana(p_) # romanize as possible if p_ == p: # 変化がないものは以下の処理を行わずに戻る。戻り値は十分大きければなんでも良い。 return 12345 return self.eval_normal(p, kana_ref, nbest)
def toKana(self, aString): return romkan.to_katakana(romkan.to_roma(aString))
def execute(self, aTerm): if self.convertKana: return DictionaryDisplayElement.objects.raw(query_base.format(self.select), [self.order, romkan.to_katakana(romkan.to_hepburn(aTerm))]) else: return DictionaryDisplayElement.objects.raw(query_base.format(self.select), [self.order, aTerm])
def multiscrape(self, name, shy=False): if shy and self.config.has_key(name) and self.config[name]['ja']: return if name != u'名前' and name != u'ふりがな': nodes = self.root.xpath("//_:h4[contains(text(), '%s')]/following-sibling::_:p" % name, namespaces=NS) else: nodes = self.root.xpath("//_:h3", namespaces=NS) if not nodes: return iterator = nodes[0].itertext() val = '' l = [] while 1: try: val = iterator.next() val = re.sub(u'^[ \r\n]+', '', val) val = re.sub(u'[ \r\n]+$', '', val) if val: l.append(val) except: break val = re.sub('^[ \n]*(.*?)[ \n]*$', '\\1', '\n'.join(l)) val = val.strip() val = makeHankaku(val) if name == u'名前': lst = val.split('\n') if not self.config.has_key(name): self.config[name] = {} self.config[name]['ja'] = lst[0] elif name == u'ふりがな' and not shy: if not self.config.has_key(u'名前'): self.config[u'名前'] = {} lst = val.split('\n') if len(lst) > 1: suzure = lst[1].replace(u' ', '').replace(' ', '') self.config[u'名前']['kana'] = lst[1] self.config[u'名前']['en'] = titleCase(romkan.to_hepburn(lst[1].replace(u' ', ' '))) self.config[u'並べ替え']['ja'] = romkan.to_katakana(romkan.to_kunrei(suzure)) self.config[u'並べ替え']['en'] = romkan.to_roma(suzure) else: self.config[u'名前']['kana'] = '' elif name == u'教員からのメッセージ': if not self.config.has_key(u'学部メセージ'): self.config[u'学部メッセージ'] = {} self.config[u'学部メッセージ']['ja'] = val.split('\n') elif name == u'役職': if not self.config.has_key(u'役職'): self.config[u'役職'] = {} self.config[u'役職']['ja'] = getPostJapanese(val) self.config[u'役職']['en'] = getPostEnglish(self.config[u'役職']['ja']) elif name == u'主要': if len(val.split('\n')) > 1: self.config[u'主要業績']['ja'] = val.split('\n') else: self.config[u'主要業績']['ja'] = val else: if not self.config.has_key(name): self.config[name] = {} if len(val.split('\n')) > 1: self.config[name]['ja'] = val.split('\n') if name == u'専門分野' and self.config[name]['ja'][0]: self.config[name]['en'] = fieldsMap[self.config[name]['ja'][0]] else: self.config[name]['ja'] = val if name == u'専門分野' and self.config[name]['ja']: self.config[name]['en'] = fieldsMap[self.config[name]['ja']]
def to_katakana(self): self.lang_mode = 1 for item in self.get_japanese_items(): item.setText(romkan.to_katakana(romkan.to_roma(item.text())))