def pinyinify(): if request.method == 'POST': print 'POST' # print request.data data = request.form titlePairs = [] paragraphsPairs = [] html_doc = data['data'] html_doc = u''+html_doc soup = BeautifulSoup(html_doc, 'html.parser') print soup.h1.text # print soup.get_text() title = u'' + soup.h1.text paragraphs = soup.find_all('p') # print 'Reaching' for ch in title: titlePairs.append((ch, hanzi.to_pinyin(ch))) for p in paragraphs: paraPairs = [] print p for ch in p.text: paraPairs.append((ch, hanzi.to_pinyin(ch))) paragraphsPairs.append(paraPairs) return jsonify(title=titlePairs, paragraphs=paragraphsPairs) else: return "Error wrong method"
def test_accented_pinyin(self): self.assertEqual(hanzi.to_pinyin(self.chinese), self.apinyin) self.assertEqual(hanzi.to_pinyin(self.chinese, all_readings=True), self.apinyin_readings) self.assertEqual(hanzi.to_pinyin(self.chinese_segmented), self.apinyin_segmented) self.assertEqual(hanzi.to_pinyin(self.chinese_segmented, all_readings=True), self.apinyin_segmented_readings)
def get_pinyin(chinese_phrase): # first, query pinyin for complete phrase pinyin_phrase = hanzi.to_pinyin(chinese_phrase) # split by syllable and add relevant html container elements # e.g. "niǔdài" -> ['niǔ', 'dài'] pinyin_parts = re.findall(pinyin.syllable, pinyin_phrase) pinyin_html = "".join([add_pinyin_tone_html(x) for x in pinyin_parts]) return (pinyin_phrase, pinyin_html)
def do_make(self, arg): 'make a custom flashcard: make 什么什么' if not hanzi.has_chinese(arg): print('input is not well formed chinese characters') return # don't transition state simplified = t2s.convert(arg) traditional = s2t.convert(arg) pinyin = hanzi.to_pinyin(arg) zhuyin = hanzi.to_zhuyin(arg) english = input('english definition: ') self.save_to_file( ChineseRecognitionOnly(simplified, traditional, pinyin, zhuyin, english, '', '', '', self.tags)) self.transition_to(State.BEGIN)
def test_issue_10(self): """Incorrect readings for 女.""" reading = hanzi.to_pinyin('女') self.assertEqual('nǚ', reading)
def test_issue_7(self): reading = hanzi.to_pinyin('手') self.assertEqual('shǒu', reading) reading = hanzi.to_pinyin('收') self.assertEqual('shōu', reading)
def test_custom_container(self): apinyin = self.apinyin_readings.replace('[', '(').replace(']', ')') self.assertEqual(hanzi.to_pinyin(self.chinese, all_readings=True, container='()'), apinyin)
def test_word_readings(self): self.assertEqual(hanzi.to_pinyin('便宜'), 'piànyi') self.assertEqual(hanzi.to_pinyin('便宜', all_readings=True), '[piànyi/biànyí]')
locale: Locale def romanize(self, locale: Locale) -> Markup: r = super().romanize(locale) return Markup(r.capitalize()) if self.loan[0].isupper() else r hangul_romanize_transliter = Transliter(academic) romanizers: Mapping[Locale, Callable[[str], Markup]] = { Locale.parse('ja'): lambda t: Markup(to_roma(t.replace(' ', ''))), Locale.parse('ko'): lambda t: Markup(hangul_romanize_transliter.translit(t.replace(' ', ''))), Locale.parse('zh_CN'): lambda t: Markup(to_pinyin(t).replace(' ', '')), Locale.parse('zh_HK'): lambda t: Markup( re.sub( r'(\d) ?', r'<sup>\1</sup>', t if re.match(r'^[A-Za-z0-9 ]+$', t) else pinyin_jyutping_sentence.jyutping(t, True, True))), Locale.parse('zh_TW'): lambda t: Markup(zhuyin_to_pinyin(to_zhuyin(t)).replace(' ', '')), } def romanize(term: str, locale: Locale) -> Markup: global romanizers try: f = romanizers[locale] except KeyError:
g = NeoGraph() db = Db() eng = db.create_engine() df = pd.read_sql("""select * from words""", eng) df['len'] = df['characters'].apply(lambda x: len(list(x))) df['c1'] = df['characters'].apply(lambda x: list(x)[0]) df['c2'] = df['characters'].apply(lambda x: list(x)[1] if len(x) >= 2 else None) df['p1'] = df['pinyin'].apply(lambda x: x.split(' ')[0]) df['p2'] = df['pinyin'].apply(lambda x: x.split(' ')[1] if ' ' in x else None) df['english'] = df['english'].apply(lambda x: x.replace("'", "\\'")) # df['english_short'] = df['english'].apply(lambda x: x.split(';')[0]) # df['descr'] = df.apply(lambda x: f"""{x['pinyin']} | {x['english_short']}""",axis = 1) df['pinyin_num'] = df['pinyin'].copy() df['pinyin'] = df['characters'].apply(lambda x: hanzi.to_pinyin(x)) helper = [] chars = [] for i, c in df.iterrows(): for h, p in zip(list(c['characters']), c['pinyin'].split(' ')): helper.append(f'{h} : {p}') chars.append(h) truncate = True if truncate: g.truncate() g.add_characters(list(set(chars)), df) g.create_links(df[df['len'] == 2])
def to_pinyin(sent): return hanzi.to_pinyin(sent, accented=False)