예제 #1
0
def pinyinify():
    if request.method == 'POST':
        print 'POST'
        # print request.data

        data = request.form
        titlePairs = []
        paragraphsPairs = []
        html_doc = data['data']
        html_doc = u''+html_doc
        soup = BeautifulSoup(html_doc, 'html.parser')
        print soup.h1.text
        # print soup.get_text()
        title = u'' + soup.h1.text
        paragraphs = soup.find_all('p')
        # print 'Reaching'
        for ch in title:
            titlePairs.append((ch, hanzi.to_pinyin(ch)))
        for p in paragraphs:
            paraPairs = []
            print p
            for ch in p.text:
                paraPairs.append((ch, hanzi.to_pinyin(ch)))
            paragraphsPairs.append(paraPairs)
        return jsonify(title=titlePairs, paragraphs=paragraphsPairs)
    else:
        return "Error wrong method"
예제 #2
0
 def test_accented_pinyin(self):
     self.assertEqual(hanzi.to_pinyin(self.chinese), self.apinyin)
     self.assertEqual(hanzi.to_pinyin(self.chinese, all_readings=True),
                      self.apinyin_readings)
     self.assertEqual(hanzi.to_pinyin(self.chinese_segmented),
                      self.apinyin_segmented)
     self.assertEqual(hanzi.to_pinyin(self.chinese_segmented,
                                      all_readings=True),
                      self.apinyin_segmented_readings)
예제 #3
0
 def test_accented_pinyin(self):
     self.assertEqual(hanzi.to_pinyin(self.chinese), self.apinyin)
     self.assertEqual(hanzi.to_pinyin(self.chinese, all_readings=True),
                      self.apinyin_readings)
     self.assertEqual(hanzi.to_pinyin(self.chinese_segmented),
                      self.apinyin_segmented)
     self.assertEqual(hanzi.to_pinyin(self.chinese_segmented,
                                      all_readings=True),
                      self.apinyin_segmented_readings)
예제 #4
0
def get_pinyin(chinese_phrase):
    # first, query pinyin for complete phrase
    pinyin_phrase = hanzi.to_pinyin(chinese_phrase)

    # split by syllable and add relevant html container elements
    # e.g. "niǔdài" -> ['niǔ', 'dài']
    pinyin_parts = re.findall(pinyin.syllable, pinyin_phrase)

    pinyin_html = "".join([add_pinyin_tone_html(x) for x in pinyin_parts])

    return (pinyin_phrase, pinyin_html)
 def do_make(self, arg):
     'make a custom flashcard: make 什么什么'
     if not hanzi.has_chinese(arg):
         print('input is not well formed chinese characters')
         return  # don't transition state
     simplified = t2s.convert(arg)
     traditional = s2t.convert(arg)
     pinyin = hanzi.to_pinyin(arg)
     zhuyin = hanzi.to_zhuyin(arg)
     english = input('english definition: ')
     self.save_to_file(
         ChineseRecognitionOnly(simplified, traditional, pinyin, zhuyin,
                                english, '', '', '', self.tags))
     self.transition_to(State.BEGIN)
예제 #6
0
 def test_issue_10(self):
     """Incorrect readings for 女."""
     reading = hanzi.to_pinyin('女')
     self.assertEqual('nǚ', reading)
예제 #7
0
 def test_issue_7(self):
     reading = hanzi.to_pinyin('手')
     self.assertEqual('shǒu', reading)
     reading = hanzi.to_pinyin('收')
     self.assertEqual('shōu', reading)
예제 #8
0
 def test_custom_container(self):
     apinyin = self.apinyin_readings.replace('[', '(').replace(']', ')')
     self.assertEqual(hanzi.to_pinyin(self.chinese, all_readings=True,
                                      container='()'), apinyin)
예제 #9
0
 def test_word_readings(self):
     self.assertEqual(hanzi.to_pinyin('便宜'), 'piànyi')
     self.assertEqual(hanzi.to_pinyin('便宜', all_readings=True),
                      '[piànyi/biànyí]')
예제 #10
0
 def test_custom_container(self):
     apinyin = self.apinyin_readings.replace('[', '(').replace(']', ')')
     self.assertEqual(hanzi.to_pinyin(self.chinese, all_readings=True,
                                      container='()'), apinyin)
예제 #11
0
 def test_word_readings(self):
     self.assertEqual(hanzi.to_pinyin('便宜'), 'piànyi')
     self.assertEqual(hanzi.to_pinyin('便宜', all_readings=True),
                      '[piànyi/biànyí]')
예제 #12
0
    locale: Locale

    def romanize(self, locale: Locale) -> Markup:
        r = super().romanize(locale)
        return Markup(r.capitalize()) if self.loan[0].isupper() else r


hangul_romanize_transliter = Transliter(academic)

romanizers: Mapping[Locale, Callable[[str], Markup]] = {
    Locale.parse('ja'):
    lambda t: Markup(to_roma(t.replace(' ', ''))),
    Locale.parse('ko'):
    lambda t: Markup(hangul_romanize_transliter.translit(t.replace(' ', ''))),
    Locale.parse('zh_CN'):
    lambda t: Markup(to_pinyin(t).replace(' ', '')),
    Locale.parse('zh_HK'):
    lambda t: Markup(
        re.sub(
            r'(\d) ?', r'<sup>\1</sup>', t if re.match(r'^[A-Za-z0-9 ]+$', t)
            else pinyin_jyutping_sentence.jyutping(t, True, True))),
    Locale.parse('zh_TW'):
    lambda t: Markup(zhuyin_to_pinyin(to_zhuyin(t)).replace(' ', '')),
}


def romanize(term: str, locale: Locale) -> Markup:
    global romanizers
    try:
        f = romanizers[locale]
    except KeyError:
예제 #13
0
 def test_issue_10(self):
     """Incorrect readings for 女."""
     reading = hanzi.to_pinyin('女')
     self.assertEqual('nǚ', reading)
예제 #14
0
 def test_issue_7(self):
     reading = hanzi.to_pinyin('手')
     self.assertEqual('shǒu', reading)
     reading = hanzi.to_pinyin('收')
     self.assertEqual('shōu', reading)
예제 #15
0
g = NeoGraph()
db = Db()
eng = db.create_engine()
df = pd.read_sql("""select * from words""", eng)

df['len'] = df['characters'].apply(lambda x: len(list(x)))
df['c1'] = df['characters'].apply(lambda x: list(x)[0])
df['c2'] = df['characters'].apply(lambda x: list(x)[1]
                                  if len(x) >= 2 else None)
df['p1'] = df['pinyin'].apply(lambda x: x.split(' ')[0])
df['p2'] = df['pinyin'].apply(lambda x: x.split(' ')[1] if ' ' in x else None)
df['english'] = df['english'].apply(lambda x: x.replace("'", "\\'"))
# df['english_short'] = df['english'].apply(lambda x: x.split(';')[0])
# df['descr'] = df.apply(lambda x: f"""{x['pinyin']} | {x['english_short']}""",axis = 1)
df['pinyin_num'] = df['pinyin'].copy()
df['pinyin'] = df['characters'].apply(lambda x: hanzi.to_pinyin(x))

helper = []
chars = []
for i, c in df.iterrows():
    for h, p in zip(list(c['characters']), c['pinyin'].split(' ')):
        helper.append(f'{h} : {p}')
        chars.append(h)

truncate = True
if truncate:
    g.truncate()

g.add_characters(list(set(chars)), df)

g.create_links(df[df['len'] == 2])
예제 #16
0
def to_pinyin(sent):
    return hanzi.to_pinyin(sent, accented=False)