def get_song_meta(song): d = {} search = set() for k, v in song.meta.items(): if request.latin: d[k] = v[(request.lc, "l")] else: d[k] = v[request.lc] for k in ("title", "artist", "seenon", "album"): if k in song.meta: v = song.meta[k] search.add(normalize(v[request.lc])) search.add(normalize(v["k"])) search.add(normalize(v["l"])) search.add( normalize(jaconv.kana2alphabet(jaconv.kata2hira( v["k"]))).replace("ー", "")) for k in ("genre", ): if k in song.meta: v = song.meta[k] search.add(normalize(v[request.lc])) d["search"] = list(search) if request.latin: d["sort"] = song.meta["title"][(request.lc, "l")] if ord(d["sort"][0:1]) > 0x100: # Try again with kana-to-romaji, might help manufacture some sensible sort order d["sort"] = jaconv.kana2alphabet( jaconv.kata2hira(song.meta["title"][(request.lc, "l", "k")])) else: d["sort"] = song.meta["title"][(request.lc, "k")] return d
def get_song_meta(song): d = {} search = set() for k, v in song.meta.items(): if request.latin: d[k] = v[(request.lc, "l")] else: d[k] = v[request.lc] for k in ("title", "artist", "seenon", "album"): if k in song.meta: v = song.meta[k] search.add(normalize(v[request.lc])) search.add(normalize(v["k"])) search.add(normalize(v["l"])) search.add( normalize(jaconv.kana2alphabet(jaconv.kata2hira( v["k"]))).replace("ー", "")) for k in ("genre", ): if k in song.meta: v = song.meta[k] search.add(normalize(v[request.lc])) d["search"] = list(search) if request.latin: d["sort"] = song.meta["title"][(request.lc, "l")] else: d["sort"] = song.meta["title"][(request.lc, "k")] return d
def test_kana2alphabet(): assert_equal(jaconv.kana2alphabet('まみさん'), 'mamisan') assert_equal(jaconv.kana2alphabet('はっとり'), 'hattori') assert_equal(jaconv.kana2alphabet('はっ'), 'haxtsu') assert_equal(jaconv.kana2alphabet('ぽっ'), 'poxtsu') assert_equal(jaconv.kana2alphabet('ふぁふぃふぇふぉ'), 'fafifefo') assert_equal(jaconv.kana2alphabet('っって'), 'xtsutte')
async def on_message(message): message.content = message.content.lower() if message.content.startswith("!hello"): await message.channel.send("ビールとおっぱいがだいすきです!") if message.content.startswith("!cotd"): rand_num = random.randint(0, len(hiragana) - 1) rand_hiragana_char = hiragana[rand_num] translated_char = jaconv.kana2alphabet(rand_hiragana_char) # TODO Let's try to make this look nicer! await message.channel.send( f'Character of the day: {rand_hiragana_char}\nTranslation: {translated_char}' ) if message.content.startswith("!wotd"): rand_num = random.randint(1, 3035) word = query_db(rand_num) await message.channel.send( f"Hiragana: {word.hiragana}\nKatakana: {word.katakana}\nKanji: {word.kanji}\nMeaning: {word.meaning}\n" ) if message.content.startswith("!potd"): pass if message.content.startswith("!help"): e = discord.Embed(description=''' Commands ------------- ***!hello*** - Say hi to Gintoki Sensei ***!COTD*** (Character of the day!) - Get the character of the day ***!WOTD*** (Word of the day!) - Get the word of the day and link to the word pronounciation ***!POTD*** (Phrase of the day!) - Get the phrase of the day''', colour=discord.Colour.teal()) await message.channel.send(embed=e)
def test_kana_to_hepburn(): assert_equal(jaconv.kana2alphabet('まみさん'), 'mamisan') assert_equal(jaconv.kana2alphabet('はっとり'), 'hattori')
def _transliterate(token: str) -> str: s = jaconv.kata2hira(token) s = jaconv.kana2alphabet(s) return s
def read_aozora_bunko_list(path: str, ndc_tr: Dict[str, str]) -> defaultdict: ''' Reads in the list_person_all_extended_utf8.csv of Aozora Bunko and constructs a nested dictionary keyed on author and title. This is then used identify the correct path to the file as well as give more metadata. ''' d = defaultdict(dict) url_rx = re.compile(r'https://www\.aozora\.gr\.jp/cards/(\d+)/(.+)') with ZipFile(path) as z: with z.open('list_person_all_extended_utf8.csv', 'r') as f: for row in csv.DictReader(TextIOWrapper(f)): # Some works have versions in both new- and old-style # kana. As we are only interested in the new-style # version, we skip the old one while keeping only # old-style works. if row['文字遣い種別'] != '新字新仮名': log.warning(f'Skipping processing of old-syle kana work: {row}') continue # Use the lower value from 底本初版発行年1 and 初出: year = '' year_rx = re.compile(r'(\d{4})(.+)年\s?(\d{1,2})月((\d{1,2})日)?') year_matches = year_rx.match(row['底本初版発行年1']) if year_matches and year_matches.groups(): year = year_matches.groups()[0] year_alternate_matches = year_rx.search(row['初出']) if year_alternate_matches and year_alternate_matches.groups(): alt_year = year_alternate_matches.groups()[0] if year == '': year = alt_year elif int(alt_year) < int(year): year = alt_year # Sanity check for year: year_death = re.search(r'\d{4}', row['没年月日']) if year_death and year_death.groups() and int(year_death.groups()[0]) < int(year): year = '<' + year_death # Specify upper bound as last resort. author_ja = row['姓'] + row['名'] author_en = row['名ローマ字'] + ' ' + row['姓ローマ字'] title = row['作品名'] title_ja = title title_en = jaconv.kana2alphabet(jaconv.kata2hira(row['作品名読み'])).title() subtitle = row['副題'] if subtitle != '': title_ja += ': ' + subtitle title_en += ': ' + romanize(row['副題読み']).title() try: match = url_rx.match(row['XHTML/HTMLファイルURL']) id = match.group(1) file_path = match.group(2) except AttributeError: log.debug('Missing XHTML/HTML file for record {}, skipping...'.format(row)) continue ndc = row['分類番号'].replace('NDC ', '').replace('K', '') if len(ndc) > 3: ndcs = ndc.split() ndc = '/'.join(ndc_tr[n] for n in ndcs) elif not ndc: ndc = '' else: ndc = ndc_tr[ndc] if 'K' in row['分類番号']: ndc += ' (児童書)' if title in d[author_ja]: # Remove translations. d[author_ja].pop(title, None) if len(d[author_ja]) == 0: d.pop(author_ja, None) else: d[author_ja][title] = { 'author_ja': author_ja, 'author': author_en, 'author_year': f'{row["生年月日"]}--{row["没年月日"]}', 'title_ja': title_ja, 'title': title_en, 'year': year, 'ndc': ndc, 'file_path': 'aozorabunko/cards/{}/{}'.format(id, file_path), 'file_name': '{}_{}_{}'.format( # TODO Do we need to shorthen these? row['姓ローマ字'], row['名ローマ字'][0:1], romanize(row['作品名読み'][0:7]).title() ) } return d
def romanize(s: str) -> str: return re.sub(r'_+', '_', re.sub(r'[^a-zA-Z]', '_', jaconv.kana2alphabet(jaconv.kata2hira(s.replace('ゔ', 'v')))))