Example #1
0
def open_ust(file_name):
    song = []
    instance = {}
    bpm = 0.0
    for strm in open(file_name, "r"):
        if strm.strip().startswith("["):
            if len(instance) > 0:
                if instance.get("Tempo", None):
                    bpm = float(".".join(instance["Tempo"].split(",")))
                if instance.get("Lyric", None):
                    if len(instance["Lyric"].split(" ")) > 1:
                        instance["Lyric"] = instance["Lyric"].split(" ")[-1]
                    if "R" in instance["Lyric"] or "息" in instance["Lyric"]:
                        instance["Lyric"] = ""
                    if hira_p.search(unicode(instance["Lyric"])):
                        instance["Lyric"] = re.sub("[A-Za-z]+", "",
                                                   instance["Lyric"])
                    else:
                        instance["Lyric"] = romkan.to_katakana(
                            instance["Lyric"])
                    instance["Lyric"] = re.sub(
                        u"[^ァ-ン]", "",
                        unicode(instance["Lyric"])).encode("utf8")
                instance["Tempo"] = str(bpm)
                if "NoteNum" in instance:
                    if instance["Lyric"] == "":
                        instance["NoteNum"] = "rest"
                    song.append(instance)
            instance = {"InstanceIdx": strm.strip().lstrip("[#").rstrip("]")}
        else:
            if len(strm.strip().split("=")) < 2:
                continue
            key, value = strm.strip().split("=")
            if key == "Lyric":
                value = jaconv.hira2kata(unicode(value)).encode("utf8")
            if key in ("NoteNum", "Lyric", "Length", "Tempo"):
                instance[key] = value
    if len(instance) > 0:
        if instance.get("Tempo", None):
            bpm = float(".".join(instance["Tempo"].split(",")))
        if instance.get("Lyric", None):
            if len(instance["Lyric"].split(" ")) > 1:
                instance["Lyric"] = instance["Lyric"].split(" ")[-1]
            if "R" in instance["Lyric"] or "息" in instance["Lyric"]:
                instance["Lyric"] = ""
            if hira_p.search(unicode(instance["Lyric"])):
                instance["Lyric"] = re.sub("[A-Za-z]+", "", instance["Lyric"])
            else:
                instance["Lyric"] = romkan.to_katakana(instance["Lyric"])
            instance["Lyric"] = re.sub(u"[^ァ-ン]", "", unicode(
                instance["Lyric"])).encode("utf8")
        instance["Tempo"] = str(bpm)
        if "NoteNum" in instance:
            if instance["Lyric"] == "":
                instance["NoteNum"] = "rest"
            song.append(instance)
    return song
Example #2
0
    def reading_form(cls, text):
        tokens = cls.sudachi.tokenize(text, cls._mode)

        for t in (x for x in tokens if x.part_of_speech()[0] not in {"空白", "補助記号"}):
            if not (v := t.reading_form()):
                # 読みが存在しない場合はromkanでカタカナへの変換を試みる 例: tarou
                v = romkan.to_katakana(t.surface())
            else:
                # カタカナでなく英数字が返ることがあるので変換する 例: shi
                v = romkan.to_katakana(v)

            # 半角カタカナを全角にしておく
            # 辞書に読みが存在しない場合、半角カタカナが混じり、romkan.to_hepburnが反応しなくなるため
            v = jaconv.h2z(v, ignore="", kana=True, ascii=False, digit=False)
            yield v
Example #3
0
def search(jisho, all_mode, rounds, verbose):
    """Dictionary mode, where kanji previously read can be searched"""
    if all_mode:
        show_all(jisho)
    elif rounds > 0:
        game(jisho, rounds)
    else:
        for x in sys.argv[1:]:
            if x in jisho:
                jisho[x]()
            else:
                hiragana = romkan.to_hiragana(x)
                katakana = romkan.to_katakana(x)
                for kanji in jisho:
                    if x in jisho[kanji].meaning:
                        if verbose is False:
                            print(kanji, "=", x)
                        else:
                            jisho[kanji]()
                    elif hiragana in jisho[kanji].kunyomi:
                        if verbose is False:
                            print(kanji, "->", hiragana)
                        else:
                            jisho[kanji]()
                    elif katakana in jisho[kanji].onyomi:
                        if verbose is False:
                            print(kanji, "~>", katakana)
                        else:
                            jisho[kanji]()
Example #4
0
def subvert(expr, t):
    import romkan
    expr = re.sub(u'\u014D|\u00F4', 'ou', expr, re.UNICODE)
    expr = re.sub(u'\u016B', 'uu', expr, re.UNICODE)
    expr = re.sub(u'\u0113', 'ee', expr, re.UNICODE)
    expr = re.sub(u'\u0101', 'aa', expr, re.UNICODE)
    if t == KATA: kana = romkan.to_katakana(expr)
    else: kana = romkan.to_hiragana(expr)
    return kana
    def calculatePartsKana(self, aString):
        s = set()

        kana = romkan.to_katakana(romkan.to_roma(aString))

        for e in aString.split(" "):
            s |= self.calculatePartsElement(kana)

        return self.serializeSet(s)
Example #6
0
def converter():
    frase = ent_romaji.get()

    ent_hiraga.delete(0, tk.END)
    ent_hiraga.insert(0, romkan.to_hiragana(frase))

    ent_kataka.delete(0, tk.END)
    ent_kataka.insert(0, romkan.to_katakana(frase))

    pass
Example #7
0
def romaji_to_kana(request):
    """
    Converts romaji in either katakana or hiragana.
    """
    word = request.query_params.get('word', '')[0:1000]
    return Response(
        {
            'hiragana': romkan.to_hiragana(word),
            'katakana': romkan.to_katakana(word)
        },
        status=status.HTTP_200_OK)
Example #8
0
 def kana(self, msg, args):
     """Converts Romazi to kana"""
     result = "Am I supposed to guess the word you want?..."
     word = None
     if len(args) == 1:
         word = args[0]
     elif len(args) > 1:
         word = " ".join(args)
     if word is not None:
         if word.isupper():
             result = romkan.to_katakana(word)
         elif word.islower():
             result = romkan.to_hiragana(word)
     return result
Example #9
0
 def lookup(self, romaji, channel, use_romkan=False):
   '''
   Initiate an asynchronous transform of romaji to katakana
   This can be done by a local handler (use_romkan=True)
   Or a network service www.
   '''
   if use_romkan:
     katakana = romkan.to_katakana(romaji)
     self.callback(romaji, channel, katakana)
   else:
     #initiate a network lookup of romaji to katakana
     result = getPage('http://www.sljfaq.org/cgi/e2k.cgi?word={romaji}'.format(romaji=romaji))
     result.addCallbacks(
       callback = GetKatakana.HTMLResponse(self.callback, romaji, channel),
       errback = GetKatakana.HTMLError(self.error_callback))
Example #10
0
 async def romkan(self, ctx, *, text: commands.clean_content):
     """Convert romaji into hiragana or katakana, or vice-versa."""
     if text[:3] in ["hg ", "kk ", "ro "]:
         tp, text = text[:2], text[3:]
     else:
         tp = ctx.invoked_with
         if tp == "romkan":
             return await ctx.send(
                 "Please either use `!hg`, `!kk` or `!ro` (for hiragana, katakana and romaji respectively), or pass the type as an argument: `!romkan hg LyricLy wa baka desu yo`"
             )
     if tp == "hg":
         await ctx.send(romkan.to_hiragana(text))
     elif tp == "kk":
         await ctx.send(romkan.to_katakana(text))
     elif tp == "ro":
         await ctx.send(romkan.to_hepburn(text))
Example #11
0
 def command_ja(self, event):
     '''Usage: ~ja <k/h/r> <arg> displays katakana/hiragana/romaji for a given argument, converting between romaji and kana'''
     try:
         dest, phrase = event.params.split(' ', 1)
         dest = dest.lower()
         if dest == 'k':
             resp = romkan.to_katakana(phrase)
         elif dest == 'h':
             resp = romkan.to_hiragana(phrase)
         elif dest == 'r':
             resp = romkan.to_roma(phrase.decode('utf-8'))
         else:
             raise
         self.send_message(event.respond, resp)
     except:
         self.send_message(event.respond, 'Invalid input, please check syntax.')
         raise
Example #12
0
 def command_ja(self, event):
     '''Usage: ~ja <k/h/r> <arg> displays katakana/hiragana/romaji for a given argument, converting between romaji and kana'''
     try:
         dest, phrase = event.params.split(' ', 1)
         dest = dest.lower()
         if dest == 'k':
             resp = romkan.to_katakana(phrase)
         elif dest == 'h':
             resp = romkan.to_hiragana(phrase)
         elif dest == 'r':
             resp = romkan.to_roma(phrase.decode('utf-8'))
         else:
             raise
         self.send_message(event.respond, resp)
     except:
         self.send_message(event.respond, 'Invalid input, please check syntax.')
         raise
Example #13
0
def lookup(rad_dawg, dict_dawg, pattern):
    components = []

    # sanity
    if len(pattern) > 40 and pattern.count('[') > 20:
        return []

    # romaji => kana
    pattern = re.sub('[-A-Z]+', lambda m: romkan.to_katakana(m.group(0)), pattern)
    pattern = re.sub('[-a-z]+', lambda m: romkan.to_hiragana(m.group(0)), pattern)

    for c in PATTERN_RE.findall(pattern):
        if c[0] == '[' and c[-1] == ']':
            s = rad_dawg.lookup_kanji(u''.join(c[1:-1]))
            components.append(s)
        else:
            components.append(set([c]))

    return dict_dawg.lookup_word(components)[:MAX_RESULTS]
def strings(input):
    global raw
    raw = input
    global kunrei
    kunrei = romkan.to_kunrei(input)
    # Using "kunrei" because hiragana <> katakana conversion doesn't work
    global hiragana
    hiragana = romkan.to_hiragana(kunrei) 
    global katakana
    katakana = romkan.to_katakana(kunrei)
    global hepburn
    hepburn = romkan.to_hepburn(hiragana)
    global onoma
    onoma = kunrei
    changes_dict = {'ch': 'C', 'ty':'T', 'sy':'S', 'ny': 'N', 'zy':'Z', \
                    'dj':'D', 'l':'r','xtu':'Q', 'aa':'a-','ee':'e-','ii':'i-', \
                    'oo':'o-','uu':'u-'}
    for key in changes_dict:
        onoma = onoma.replace(key, changes_dict[key])
    if onoma.endswith('tto'):
        onoma = onoma[:-3] + 'Q'
    return(hiragana + ' ' + katakana + ' ' + hepburn + ' ' + onoma)
Example #15
0
def strings(input):
    global raw
    raw = input
    global kunrei
    kunrei = romkan.to_kunrei(input)
    # Using "kunrei" because hiragana <> katakana conversion doesn't work
    global hiragana
    hiragana = romkan.to_hiragana(kunrei)
    global katakana
    katakana = romkan.to_katakana(kunrei)
    global hepburn
    hepburn = romkan.to_hepburn(hiragana)
    global onoma
    onoma = kunrei
    changes_dict = {'ch': 'C', 'ty':'T', 'sy':'S', 'ny': 'N', 'zy':'Z', \
                    'dj':'D', 'l':'r','xtu':'Q', 'aa':'a-','ee':'e-','ii':'i-', \
                    'oo':'o-','uu':'u-'}
    for key in changes_dict:
        onoma = onoma.replace(key, changes_dict[key])
    if onoma.endswith('tto'):
        onoma = onoma[:-3] + 'Q'
    return (hiragana + ' ' + katakana + ' ' + hepburn + ' ' + onoma)
Example #16
0
def game(jisho, rounds):
    """Plays the random kanji game!"""
    correct = 0
    count = rounds
    while count > 0:
        count -= 1
        kanji = random.choice(list(jisho.keys()))
        quiz = random.randint(0, 2)

        if quiz == 0:
            answer = input("What does " + kanji + " mean? ")
            if answer in jisho[kanji].meaning:
                print("Correct! :D")
                correct += 1
            else:
                print("Incorrect!", kanji, "=", jisho[kanji].meaning)

        if quiz == 1:
            answer = input("Type in one of the kunyomi of " + kanji + " : ")
            if romkan.to_hiragana(answer) in jisho[kanji].kunyomi:
                print("Correct! :D")
                correct += 1
            else:
                print("Incorrect!")
            print(kanji, "=", jisho[kanji].kunyomi)

        if quiz == 2:
            answer = input("Type in one of the onyomi of " + kanji + " : ")
            if romkan.to_katakana(answer) in jisho[kanji].onyomi:
                print("Correct! :D")
                correct += 1
            else:
                print("Incorrect!")
            print(kanji, "=", jisho[kanji].onyomi)

    print(">> You got " + str(correct) + " out of " + str(rounds)
          + " (" + str(100*correct/rounds) + "%)!\n")
Example #17
0
def open_ust(file_name):
    song = []
    instance = {}
    currentBeat = defaultdict(float)
    bpm = 0.0
    currentTime = 0.0
    for strm in open(file_name, "r"):
        if strm.strip().startswith("["):
            if len(instance) > 0:
                if instance.get("Tempo", None):
                    bpm = float(".".join(instance["Tempo"].split(",")))
                if instance.get("Length", None):
                    currentTime = get_beat2time(currentBeat)
                    instance["StartTime"] = currentTime
                    currentRhythm = get_beat2rhythm(currentBeat)
                    instance["Beat"] = currentRhythm
                    currentBeat[bpm] += float(instance["Length"])
                    m, s = divmod(currentTime, 60)
                    h, m = divmod(m, 60)
                    instance["StartTimeReadable"] = "%d/%d/%s" % (h, m, s)
                    instance["Duration"] = str(
                        60.0 / bpm * (float(instance["Length"]) / 480.0))
                if instance.get("Lyric", None):
                    if len(instance["Lyric"].split(" ")) > 1:
                        instance["Lyric"] = instance["Lyric"].split(" ")[-1]
                    if "R" in instance["Lyric"] or "息" in instance["Lyric"]:
                        instance["Lyric"] = ""
                    if hira_p.search(unicode(instance["Lyric"])):
                        instance["Lyric"] = re.sub("[A-Za-z]+", "",
                                                   instance["Lyric"])
                    else:
                        instance["Lyric"] = romkan.to_katakana(
                            instance["Lyric"])
                    instance["Lyric"] = re.sub(
                        u"[^ァ-ン]", "",
                        unicode(instance["Lyric"])).encode("utf8")
                instance["Tempo"] = str(bpm)
                song.append(instance)
            instance = {"InstanceIdx": strm.strip().lstrip("[#").rstrip("]")}
        else:
            if len(strm.strip().split("=")) < 2:
                continue
            key, value = strm.strip().split("=")
            if key == "Lyric":
                value = jaconv.hira2kata(unicode(value)).encode("utf8")
            instance[key] = value

    if len(instance) > 0:
        if instance.get("Tempo", None):
            bpm = float(".".join(instance["Tempo"].split(",")))
        if instance.get("Length", None):
            currentTime = get_beat2time(currentBeat)
            instance["StartTime"] = currentTime
            currentRhythm = get_beat2rhythm(currentBeat)
            instance["Beat"] = currentRhythm
            currentBeat[bpm] += float(instance["Length"])
            m, s = divmod(currentTime, 60)
            h, m = divmod(m, 60)
            instance["StartTimeReadable"] = "%d/%d/%s" % (h, m, s)
            instance["Duration"] = str(60.0 / bpm *
                                       (float(instance["Length"]) / 480.0))
        if instance.get(
                "Lyric", None
        ):  # extract HIRAGANA (NOTE: Sometimes VOCALOID-specific characters are included)
            if len(instance["Lyric"].split(" ")) > 1:
                instance["Lyric"] = instance["Lyric"].split(" ")[-1]
            if "R" in instance["Lyric"] or "息" in instance["Lyric"]:
                instance["Lyric"] = ""
            if hira_p.search(unicode(instance["Lyric"])):
                instance["Lyric"] = re.sub("[A-Za-z]+", "", instance["Lyric"])
            else:
                instance["Lyric"] = romkan.to_katakana(instance["Lyric"])
            instance["Lyric"] = re.sub(u"[^ァ-ン]", "", unicode(
                instance["Lyric"])).encode("utf8")
        instance["Tempo"] = str(bpm)
        song.append(instance)
    return song
Example #18
0
def generate_search_conditions(args):
    '''args = command-line argument dict (argparse object)'''

    if args.regexp:
        regexp_flags = (True,)
    elif tt.has_regexp_special(args.query_s):
        regexp_flags = (False, True)
    else:
        regexp_flags = (False,)

    if args.field != 'auto':
        fields = (args.field,)
    else:
        if tt.is_kana(args.query_s):
            fields = ('kanji', 'reading')
        else:
            fields = ('kanji', 'reading', 'gloss')

    if args.extent != 'auto':
        extents = (args.extent,)
    else:
        extents = ('whole', 'word', 'beginning', 'partial')

    conditions = []

    for regexp in regexp_flags:
        for field in fields:
            for extent in extents:

                if field == 'gloss' and extent == 'beginning' and args.extent == 'auto':
                    # when we search for e.g. 'man' in auto guesses, we
                    # typically don't want 'manatee' but not 'humanity'
                    continue

                elif field in ('kanji', 'reading') and extent == 'word':
                    if args.extent == 'auto':
                        # useless combination generated, skip
                        continue
                    else:
                        # useless combination requested, adjust
                        extent = 'whole'

                if field == 'reading' and tt.is_latin(args.query_s):
                    # 'reading' field auto-convert romaji to kana. as of this
                    # writing, JMdict has no romaji in readingfields.
                    queries = ([romkan.to_hiragana(s) for s in args.query],
                               [romkan.to_katakana(s) for s in args.query])

                    # romkan will convert ASCII hyphen-minus to CJKV long 'ー'
                    # we back-convert it in start position, to preserve FTS
                    # operator '-'.
                    def fix_hyphen(s):
                        if len(s) > 1 and s[0] == 'ー':
                            s = '-' + s[1:]
                        return s

                    queries = [[fix_hyphen(s) for s in query]
                               for query in queries]
                else:
                    queries = (args.query,)
                # TODO: add wide-char

                for query in queries:
                    conditions.append(SearchConditions(args, query, regexp, field, extent))

    return conditions
Example #19
0
                (current[1] + 1):i]
            correct += " " + roma[current[0]:(current[1] + 1)] + " " + roma[
                (current[1] + 1):i]
            current = [i, i]
    if current is not None:
        result += "[" + answer[current[0]:(current[1] + 1)] + "]" + answer[
            (current[1] + 1):]
        correct += " " + roma[current[0]:(current[1] +
                                          1)] + " " + roma[(current[1] + 1):]
    return result, correct


while True:
    i = random.randint(0, len(words))
    to_write = words[i]
    roma = romkan.to_roma(to_write).strip()
    if mode == 2:
        to_write = romkan.to_katakana(roma) + "\n"
    tries = 0
    while tries < max_tries:
        answer = input(to_write + "> ").strip()
        if answer == roma:
            print("\tcorrect!")
            break
        else:
            print("\tWRONG!")
            tries += 1
            if tries == max_tries:
                errors, correct = find_error(answer, roma)
                print("\tAnswer was " + correct + "\n\tYou wrote: " + errors)
Example #20
0
def to_katakana(romaji: str) -> str:
    return romkan.to_katakana(romaji)
Example #21
0
 def _to_katakana(self):
     return romkan.to_katakana(self.word)
Example #22
0
	def render(self):
		super(SearchResults,self).clear();

		(h,w) = self.win.getmaxyx();
		for i,entry in enumerate(self.results):

			kele = entry.get("k_ele");
			kele = ([kele] if not isinstance(kele,list) else kele) if kele is not None else [];
			rele = entry.get("r_ele");
			rele = ([rele] if not isinstance(rele,list) else rele) if rele is not None else [];

			index = 0; #re_restr kanji index

			#create kanji->reading dictionary for read restrictions
			kett = {};
			for r in rele:
				try:
					for restr in loopOn(r["re_restr"]):
						if kett.get(restr) is None:
							kett[restr] = index;
							index += 1;
				except KeyError:
					continue;

			#construct the lines for the list view
			fln = "";
			for k in kele:
				if len(fln) > 0:
					fln += ", ";
				fln += k["keb"];
				try:
					fln += "["+str(kett[k["keb"]])+"]";
				except KeyError:
					pass;
					
			for r in rele:
				if len(fln) > 0:
					fln += ", ";
				fln += r["reb"];
				try:
					for restr in loopOn(r["re_restr"]):
						fln += "["+str(kett[restr])+"]";
				except KeyError:
					pass;

			sense = entry["sense"];

			gln = "";
			for s in loopOn(sense):
				for g in loopOn(s["gloss"]):
					if len(gln) > 0:
						gln += ", ";
					gln += g["#text"];

			tagged = False;
			for te in self.jisho.tagdict:
				if entry["ent_seq"] in self.jisho.tagdict[te]:
					fln += " ["+u"\u2764 "+te+"]";
					tagged = True;

			hiragana = romkan.to_hiragana(self.query);
			hiragana = self.kr.sub("",hiragana);
			katakana = romkan.to_katakana(self.query);
			katakana = self.kr.sub("",katakana);
			qs = [(self.query,len(self.query)),(hiragana,len(hiragana)),(katakana,len(katakana))];

			for s in [fln,gln]:
				try:
					#highlight the query
					c = [(1,4),
						[(2+i%2,5+i%2),(20,21)][tagged]
					][self.sel != i];
					q = 0;
					while qs[0][1] > 0:
						Q = -1;
						l = +0;
						for qe in qs:
							if qe[1] == 0:
								continue;
							Q1 = s.find(qe[0],q);
							l1 = qe[1];
							if Q1 != -1 and (Q1 < Q or Q == -1):
								Q = Q1;
								l = l1;
						if Q == -1:
							break;
						self.win.addstr(s[q:Q],curses.color_pair(c[0]));
						self.win.addstr(s[Q:Q+l],curses.color_pair(c[1]));
						q = Q+l;
					self.win.addstr(s[q:],curses.color_pair(c[0]));
					#self.win.addstr(s,curses.color_pair(c[0]));
					(_,x) = self.win.getyx();
					self.win.addstr(' '*(w-x),curses.color_pair(c[0]));

				except curses.error:
					break;
			else:
				continue;
			break;

		self.win.refresh();
Example #23
0
 def process(cls, v: str):
     v = romkan.to_hepburn(v)
     v = romkan.to_katakana(v)
     return v
Example #24
0
            correct = roma[:i]
        elif i - 1 == current[1]:
            current[1] = i
        else:
            result += "[" + answer[current[0]:(current[1] + 1)] + "]" + answer[(current[1] + 1):i]
            correct += " " + roma[current[0]:(current[1] + 1)] + " " + roma[(current[1] + 1):i]
            current = [i, i]
    if current is not None:
        result += "["+answer[current[0]:(current[1]+1)]+"]"+answer[(current[1]+1):]
        correct += " "+roma[current[0]:(current[1]+1)]+" " + roma[(current[1]+1):]
    return result, correct

while True:
    i = random.randint(0, len(words))
    to_write = words[i]
    roma = romkan.to_roma(to_write).strip()
    if mode == 2:
        to_write = romkan.to_katakana(roma)+"\n"
    tries = 0
    while tries < max_tries:
        answer = input(to_write+"> ").strip()
        if answer == roma:
            print("\tcorrect!")
            break
        else:
            print("\tWRONG!")
            tries += 1
            if tries == max_tries:
                errors, correct = find_error(answer, roma)
                print("\tAnswer was "+correct+"\n\tYou wrote: "+errors)
Example #25
0
File: main.py Project: xmcp/kanana
 def parse_single(txt):
     if txt.isupper():
         return romkan.to_katakana(txt)
     else:
         return romkan.to_hiragana(txt)
Example #26
0
 def _to_katakana(self):
     return romkan.to_katakana(self.word).encode("utf-8")
Example #27
0
    elif i == 57:
        char = 'JI'
        alt = '"DI"'
    elif i == 58:
        char = 'ZU'
        alt = '"DU"'

    if alt is None:
        alt = 'null'

    fixture = '{ "model": "kana.character", "pk": %(pk)s, "fields": { "romaji": "%(romaji)s", ' \
              '"hiragana": "%(hiragana)s", "katakana": "%(katakana)s", "is_plain": %(is_plain)s,' \
              '"is_dakuten": %(is_dakuten)s, "is_handakuten": %(is_handakuten)s, "is_youon": %(is_youon)s,' \
              '"gojuon_row": %(gojuon_row)s, "gojuon_col": %(gojuon_col)s, "alternate_romaji": %(alt)s, ' \
              '"notes": null } },'
    args = {
        'pk': i,
        'romaji': char,
        'hiragana': romkan.to_hiragana(char),
        'katakana': romkan.to_katakana(char),
        'is_plain': str(is_plain).lower(),
        'is_dakuten': str(is_dakuten).lower(),
        'is_handakuten': str(is_handakuten).lower(),
        'is_youon': str(is_youon).lower(),
        'gojuon_row': str(gojuon_row),
        'gojuon_col': str(gojuon_col),
        'alt': alt
    }

    print(fixture % args)
 def __convertToKatakana(self, match):
     if match.group() == "N":
         return "N"
     return to_katakana(match.group()).upper()
Example #29
0
def onromaji_to_katakana(reading):
    return reading
    return romkan.to_katakana(reading).replace(':', 'ウ')
Example #30
0
def generate_search_conditions(args):
    '''args = command-line argument dict (argparse object)'''

    if args.regexp:
        regexp_flags = (True,)
    elif tt.has_regexp_special(args.query_s):
        regexp_flags = (False, True)
    else:
        regexp_flags = (False,)

    if args.field != 'auto':
        fields = (args.field,)
    else:
        if tt.is_kana(args.query_s):
            fields = ('kanji', 'reading')
        else:
            fields = ('kanji', 'reading', 'gloss')

    if args.extent != 'auto':
        extents = (args.extent,)
    else:
        extents = ('whole', 'word', 'beginning', 'partial')

    conditions = []

    for regexp in regexp_flags:
        for field in fields:
            for extent in extents:

                if field == 'gloss' and extent == 'beginning' and args.extent == 'auto':
                    # when we search for e.g. 'man' in auto guesses, we
                    # typically don't want 'manatee' but not 'humanity'
                    continue

                elif field in ('kanji', 'reading') and extent == 'word':
                    if args.extent == 'auto':
                        # useless combination generated, skip
                        continue
                    else:
                        # useless combination requested, adjust
                        extent = 'whole'

                if field == 'reading' and tt.is_latin(args.query_s):
                    # 'reading' field auto-convert romaji to kana. as of this
                    # writing, JMdict has no romaji in readingfields.
                    queries = ([romkan.to_hiragana(s) for s in args.query],
                               [romkan.to_katakana(s) for s in args.query])

                    # romkan will convert ASCII hyphen-minus to CJKV long 'ー'
                    # we back-convert it in start position, to preserve FTS
                    # operator '-'.
                    def fix_hyphen(s):
                        if len(s) > 1 and s[0] == 'ー':
                            s = '-' + s[1:]
                        return s

                    queries = [[fix_hyphen(s) for s in query]
                               for query in queries]
                else:
                    queries = (args.query,)
                # TODO: add wide-char

                for query in queries:
                    conditions.append(SearchConditions(
                        args, query, regexp, field, extent))

    return conditions
Example #31
0
    def multiscrape(self, name, shy=False):
        if shy and self.config.has_key(name) and self.config[name]['ja']:
            return
        if name != u'名前' and name != u'ふりがな':
            nodes = self.root.xpath("//_:li/_:strong[contains(text(), '%s')]/following-sibling::_:ul/_:li|//_:h4[contains(text(), '%s')]/following-sibling::_:p" % (name, name), namespaces=NS)
        else:
            nodes = self.root.xpath("//_:h3", namespaces=NS)
        if not nodes:
            return

        iterator = nodes[0].itertext()
        val = ''
        l = []
        while 1:
            try:
                val = iterator.next()
                val = re.sub(u'^[  \r\n]+', '', val)
                val = re.sub(u'[  \r\n]+$', '', val)
                if val:
                    l.append(val)
            except:
                break

        val = re.sub('^[  \n]*(.*?)[  \n]*$', '\\1', '\n'.join(l))
        
        val = val.strip()
        val = makeHankaku(val)
        
        if name == u'名前':
            lst = val.split('\n')
            if not self.config.has_key(name):
                self.config[name] = {}
            self.config[name]['ja'] = lst[0]
        elif name == u'ふりがな' and not shy:
            if not self.config.has_key(u'名前'):
                self.config[u'名前'] = {}
            lst = val.split('\n')
            if len(lst) > 1:
                suzure = lst[1].replace(u' ', '').replace(' ', '')
                self.config[u'名前']['kana'] = lst[1]
                self.config[u'名前']['en'] = titleCase(lst[1])
                self.config[u'並べ替え']['ja'] = romkan.to_katakana(romkan.to_kunrei(suzure))
                self.config[u'並べ替え']['en'] = romkan.to_roma(suzure)
            else:
                self.config[u'名前']['kana'] = ''
        elif name == u'所属':
            if not self.config.has_key(u'所属'):
                self.config[u'所属'] = {}
            if self.staffType == 'LS' or self.staffType == 'PRO':
                self.config[u'所属']['ja'] = u'法科大学院'
                self.config[u'所属']['en'] = 'Law School (professional course)'
        elif name == u'役職':
            if not self.config.has_key(u'役職'):
                self.config[u'役職'] = {}
            self.config[u'役職']['ja'] = getPostJapanese(val)
            self.config[u'役職']['en'] = getPostEnglish(self.config[u'役職']['ja'])
            
        elif name == u'所属学会':
            if not self.config.has_key(u'学会'):
                self.config[u'学会'] = {}
            if len(val.split('\n')) > 1:
                self.config[u'学会']['ja'] = val.split('\n')
            else:
                self.config[u'学会']['ja'] = val
        elif name == u'教員からのメッセージ':
            if not self.config.has_key(u'法科大学院メッセージ'):
                self.config[u'法科大学院メッセージ'] = {}
            self.config[u'法科大学院メッセージ']['ja'] = val.split('\n')
        elif name == u'リンク':
            for node in nodes:
                subnode = node.xpath('.//_:a[@href]', namespaces=NS)
                if subnode and len(subnode):
                    self.config[u'ホームページ']['ja'] = subnode[0].text
                    self.config[u'ホームページ'][u'リンク'] = subnode[0].attrib['href']
                    break
        else:
            if not self.config.has_key(name):
                self.config[name] = {}
            if len(val.split('\n')) > 1:
                self.config[name]['ja'] = val.split('\n')
                if name == u'専門分野' and self.config[name]['ja'][0]:
                    self.config[name]['en'] = fieldsMap[self.config[name]['ja'][0]]
            else:
                self.config[name]['ja'] = val
                if name == u'専門分野' and self.config[name]['ja']:
                    self.config[name]['en'] = fieldsMap[self.config[name]['ja']]
Example #32
0
        alt = '"TI"'
    elif i == 17:
        char = 'TSU'
        alt = '"TU"'
    elif i == 52:
        char = 'JI'
        alt = '"ZI"'
    elif i == 57:
        char = 'JI'
        alt = '"DI"'
    elif i == 58:
        char = 'ZU'
        alt = '"DU"'

    if alt is None:
        alt = 'null'

    fixture = '{ "model": "kana.character", "pk": %(pk)s, "fields": { "romaji": "%(romaji)s", ' \
              '"hiragana": "%(hiragana)s", "katakana": "%(katakana)s", "is_plain": %(is_plain)s,' \
              '"is_dakuten": %(is_dakuten)s, "is_handakuten": %(is_handakuten)s, "is_youon": %(is_youon)s,' \
              '"gojuon_row": %(gojuon_row)s, "gojuon_col": %(gojuon_col)s, "alternate_romaji": %(alt)s, ' \
              '"notes": null } },'
    args = {
        'pk': i, 'romaji': char, 'hiragana': romkan.to_hiragana(char), 'katakana': romkan.to_katakana(char),
        'is_plain': str(is_plain).lower(), 'is_dakuten': str(is_dakuten).lower(),
        'is_handakuten': str(is_handakuten).lower(), 'is_youon': str(is_youon).lower(),
        'gojuon_row': str(gojuon_row), 'gojuon_col': str(gojuon_col), 'alt': alt
    }

    print(fixture % args)
Example #33
0
 def eval_force_romaji_to_kana_v1(self, text, kana_ref, nbest=20):
     p_ = jaconv.z2h(text, digit=True, ascii=True, kana=False)  # hankaku
     p = romkan.to_katakana(p_)  # romanize as possible
     if p_ == p:  # 変化がないものは以下の処理を行わずに戻る。戻り値は十分大きければなんでも良い。
         return 12345
     return self.eval_normal(p, kana_ref, nbest)
 def toKana(self, aString):
     return romkan.to_katakana(romkan.to_roma(aString))
Example #35
0
 def execute(self, aTerm):
     if self.convertKana:
         return DictionaryDisplayElement.objects.raw(query_base.format(self.select), [self.order, romkan.to_katakana(romkan.to_hepburn(aTerm))])
     else:
         return DictionaryDisplayElement.objects.raw(query_base.format(self.select), [self.order, aTerm])
Example #36
0
    def multiscrape(self, name, shy=False):
        if shy and self.config.has_key(name) and self.config[name]['ja']:
            return
        if name != u'名前' and name != u'ふりがな':
            nodes = self.root.xpath("//_:h4[contains(text(), '%s')]/following-sibling::_:p" % name, namespaces=NS)
        else:
            nodes = self.root.xpath("//_:h3", namespaces=NS)
        if not nodes:
            return

        iterator = nodes[0].itertext()
        val = ''
        l = []
        while 1:
            try:
                val = iterator.next()
                val = re.sub(u'^[  \r\n]+', '', val)
                val = re.sub(u'[  \r\n]+$', '', val)
                if val:
                    l.append(val)
            except:
                break

        val = re.sub('^[  \n]*(.*?)[  \n]*$', '\\1', '\n'.join(l))

        val = val.strip()
        val = makeHankaku(val)

        if name == u'名前':
            lst = val.split('\n')
            if not self.config.has_key(name):
                self.config[name] = {}
            self.config[name]['ja'] = lst[0]
        elif name == u'ふりがな' and not shy:
            if not self.config.has_key(u'名前'):
                self.config[u'名前'] = {}
            lst = val.split('\n')
            if len(lst) > 1:
                suzure = lst[1].replace(u' ', '').replace(' ', '')
                self.config[u'名前']['kana'] = lst[1]
                self.config[u'名前']['en'] = titleCase(romkan.to_hepburn(lst[1].replace(u' ', ' ')))
                self.config[u'並べ替え']['ja'] = romkan.to_katakana(romkan.to_kunrei(suzure))
                self.config[u'並べ替え']['en'] = romkan.to_roma(suzure)
            else:
                self.config[u'名前']['kana'] = ''
        elif name == u'教員からのメッセージ':
            if not self.config.has_key(u'学部メセージ'):
                self.config[u'学部メッセージ'] = {}
            self.config[u'学部メッセージ']['ja'] = val.split('\n')
        elif name == u'役職':
            if not self.config.has_key(u'役職'):
                self.config[u'役職'] = {}
            self.config[u'役職']['ja'] = getPostJapanese(val)
            self.config[u'役職']['en'] = getPostEnglish(self.config[u'役職']['ja'])
        elif name == u'主要':
            if len(val.split('\n')) > 1:
                self.config[u'主要業績']['ja'] = val.split('\n')
            else:
                self.config[u'主要業績']['ja'] = val
            
            
        else:
            if not self.config.has_key(name):
                self.config[name] = {}
            if len(val.split('\n')) > 1:
                self.config[name]['ja'] = val.split('\n')
                if name == u'専門分野' and self.config[name]['ja'][0]:
                    self.config[name]['en'] = fieldsMap[self.config[name]['ja'][0]]
            else:
                self.config[name]['ja'] = val
                if name == u'専門分野' and self.config[name]['ja']:
                    self.config[name]['en'] = fieldsMap[self.config[name]['ja']]
Example #37
0
 def to_katakana(self):
     self.lang_mode = 1
     for item in self.get_japanese_items():
         item.setText(romkan.to_katakana(romkan.to_roma(item.text())))