Exemple #1
0
def get_song_meta(song):
    d = {}
    search = set()
    for k, v in song.meta.items():
        if request.latin:
            d[k] = v[(request.lc, "l")]
        else:
            d[k] = v[request.lc]
    for k in ("title", "artist", "seenon", "album"):
        if k in song.meta:
            v = song.meta[k]
            search.add(normalize(v[request.lc]))
            search.add(normalize(v["k"]))
            search.add(normalize(v["l"]))
            search.add(
                normalize(jaconv.kana2alphabet(jaconv.kata2hira(
                    v["k"]))).replace("ー", ""))
    for k in ("genre", ):
        if k in song.meta:
            v = song.meta[k]
            search.add(normalize(v[request.lc]))
    d["search"] = list(search)
    if request.latin:
        d["sort"] = song.meta["title"][(request.lc, "l")]
        if ord(d["sort"][0:1]) > 0x100:
            # Try again with kana-to-romaji, might help manufacture some sensible sort order
            d["sort"] = jaconv.kana2alphabet(
                jaconv.kata2hira(song.meta["title"][(request.lc, "l", "k")]))
    else:
        d["sort"] = song.meta["title"][(request.lc, "k")]
    return d
Exemple #2
0
def get_song_meta(song):
    d = {}
    search = set()
    for k, v in song.meta.items():
        if request.latin:
            d[k] = v[(request.lc, "l")]
        else:
            d[k] = v[request.lc]
    for k in ("title", "artist", "seenon", "album"):
        if k in song.meta:
            v = song.meta[k]
            search.add(normalize(v[request.lc]))
            search.add(normalize(v["k"]))
            search.add(normalize(v["l"]))
            search.add(
                normalize(jaconv.kana2alphabet(jaconv.kata2hira(
                    v["k"]))).replace("ー", ""))
    for k in ("genre", ):
        if k in song.meta:
            v = song.meta[k]
            search.add(normalize(v[request.lc]))
    d["search"] = list(search)
    if request.latin:
        d["sort"] = song.meta["title"][(request.lc, "l")]
    else:
        d["sort"] = song.meta["title"][(request.lc, "k")]
    return d
Exemple #3
0
def test_kana2alphabet():
    assert_equal(jaconv.kana2alphabet('まみさん'), 'mamisan')
    assert_equal(jaconv.kana2alphabet('はっとり'), 'hattori')
    assert_equal(jaconv.kana2alphabet('はっ'), 'haxtsu')
    assert_equal(jaconv.kana2alphabet('ぽっ'), 'poxtsu')
    assert_equal(jaconv.kana2alphabet('ふぁふぃふぇふぉ'), 'fafifefo')
    assert_equal(jaconv.kana2alphabet('っって'), 'xtsutte')
Exemple #4
0
def test_kana2alphabet():
    assert_equal(jaconv.kana2alphabet('まみさん'), 'mamisan')
    assert_equal(jaconv.kana2alphabet('はっとり'), 'hattori')
    assert_equal(jaconv.kana2alphabet('はっ'), 'haxtsu')
    assert_equal(jaconv.kana2alphabet('ぽっ'), 'poxtsu')
    assert_equal(jaconv.kana2alphabet('ふぁふぃふぇふぉ'), 'fafifefo')
    assert_equal(jaconv.kana2alphabet('っって'), 'xtsutte')
Exemple #5
0
async def on_message(message):

    message.content = message.content.lower()

    if message.content.startswith("!hello"):
        await message.channel.send("ビールとおっぱいがだいすきです!")

    if message.content.startswith("!cotd"):
        rand_num = random.randint(0, len(hiragana) - 1)
        rand_hiragana_char = hiragana[rand_num]

        translated_char = jaconv.kana2alphabet(rand_hiragana_char)

        # TODO Let's try to make this look nicer!
        await message.channel.send(
            f'Character of the day: {rand_hiragana_char}\nTranslation: {translated_char}'
        )

    if message.content.startswith("!wotd"):
        rand_num = random.randint(1, 3035)
        word = query_db(rand_num)

        await message.channel.send(
            f"Hiragana: {word.hiragana}\nKatakana: {word.katakana}\nKanji: {word.kanji}\nMeaning: {word.meaning}\n"
        )

    if message.content.startswith("!potd"):
        pass

    if message.content.startswith("!help"):

        e = discord.Embed(description='''
        Commands
        -------------
        ***!hello*** - Say hi to Gintoki Sensei
        ***!COTD***   (Character of the day!) - Get the character of the day
        ***!WOTD***   (Word of the day!) - Get the word of the day and link to the word pronounciation
        ***!POTD***   (Phrase of the day!) - Get the phrase of the day''',
                          colour=discord.Colour.teal())

        await message.channel.send(embed=e)
Exemple #6
0
def test_kana_to_hepburn():
    assert_equal(jaconv.kana2alphabet('まみさん'), 'mamisan')
    assert_equal(jaconv.kana2alphabet('はっとり'), 'hattori')
Exemple #7
0
 def _transliterate(token: str) -> str:
     s = jaconv.kata2hira(token)
     s = jaconv.kana2alphabet(s)
     return s
def read_aozora_bunko_list(path: str, ndc_tr: Dict[str, str]) -> defaultdict:
    '''
    Reads in the list_person_all_extended_utf8.csv of Aozora Bunko and
    constructs a nested dictionary keyed on author and title. This is
    then used identify the correct path to the file as well as give
    more metadata.
    '''
    d = defaultdict(dict)
    url_rx = re.compile(r'https://www\.aozora\.gr\.jp/cards/(\d+)/(.+)')
    with ZipFile(path) as z:
        with z.open('list_person_all_extended_utf8.csv', 'r') as f:
            for row in csv.DictReader(TextIOWrapper(f)):
                # Some works have versions in both new- and old-style
                # kana. As we are only interested in the new-style
                # version, we skip the old one while keeping only
                # old-style works.
                if row['文字遣い種別'] != '新字新仮名':
                    log.warning(f'Skipping processing of old-syle kana work: {row}')
                    continue

                # Use the lower value from 底本初版発行年1 and 初出:
                year = ''

                year_rx = re.compile(r'(\d{4})(.+)年\s?(\d{1,2})月((\d{1,2})日)?')

                year_matches = year_rx.match(row['底本初版発行年1'])
                if year_matches and year_matches.groups():
                    year = year_matches.groups()[0]

                year_alternate_matches = year_rx.search(row['初出'])
                if year_alternate_matches and year_alternate_matches.groups():
                    alt_year = year_alternate_matches.groups()[0]
                    if year == '':
                        year = alt_year
                    elif int(alt_year) < int(year):
                        year = alt_year

                # Sanity check for year:
                year_death = re.search(r'\d{4}', row['没年月日'])
                if year_death and year_death.groups() and int(year_death.groups()[0]) < int(year):
                    year = '<' + year_death  # Specify upper bound as last resort.

                author_ja = row['姓'] + row['名']
                author_en = row['名ローマ字'] + ' ' + row['姓ローマ字']
                title = row['作品名']
                title_ja = title
                title_en = jaconv.kana2alphabet(jaconv.kata2hira(row['作品名読み'])).title()
                subtitle = row['副題']
                if subtitle != '':
                    title_ja += ': ' + subtitle
                    title_en += ': ' + romanize(row['副題読み']).title()

                try:
                    match = url_rx.match(row['XHTML/HTMLファイルURL'])
                    id = match.group(1)
                    file_path = match.group(2)
                except AttributeError:
                    log.debug('Missing XHTML/HTML file for record {}, skipping...'.format(row))
                    continue

                ndc = row['分類番号'].replace('NDC ', '').replace('K', '')

                if len(ndc) > 3:
                    ndcs = ndc.split()
                    ndc = '/'.join(ndc_tr[n] for n in ndcs)
                elif not ndc:
                    ndc = ''
                else:
                    ndc = ndc_tr[ndc]

                if 'K' in row['分類番号']:
                    ndc += ' (児童書)'

                if title in d[author_ja]:
                    # Remove translations.
                    d[author_ja].pop(title, None)
                    if len(d[author_ja]) == 0:
                        d.pop(author_ja, None)
                else:
                    d[author_ja][title] = {
                        'author_ja': author_ja,
                        'author': author_en,
                        'author_year': f'{row["生年月日"]}--{row["没年月日"]}',
                        'title_ja': title_ja,
                        'title': title_en,
                        'year': year,
                        'ndc': ndc,
                        'file_path': 'aozorabunko/cards/{}/{}'.format(id, file_path),
                        'file_name': '{}_{}_{}'.format(  # TODO Do we need to shorthen these?
                            row['姓ローマ字'],
                            row['名ローマ字'][0:1],
                            romanize(row['作品名読み'][0:7]).title()
                        )
                    }
    return d
def romanize(s: str) -> str:
    return re.sub(r'_+',
                  '_',
                  re.sub(r'[^a-zA-Z]',
                         '_',
                         jaconv.kana2alphabet(jaconv.kata2hira(s.replace('ゔ', 'v')))))
def test_kana_to_hepburn():
    assert_equal(jaconv.kana2alphabet('まみさん'), 'mamisan')
    assert_equal(jaconv.kana2alphabet('はっとり'), 'hattori')