Example #1
0
def init():
    phrases = []
    f = codecs.open('words.dic', encoding='utf-8')
    words = [x.strip() for x in f.read().splitlines()]
    words.append(u'')
    f.close()
    pinyins = set([pinyin.get_pinyin(x) for x in words if len(x) > 0])
    for i in pinyins:
        for j in pinyins:
            phrases.append(i + j)
    return [x for x in set(phrases)]
Example #2
0
def update_concept_pinyin():
    from pinyin import get_pinyin

    db = Database()
    cmd = "SELECT * FROM concept"
    concept_res = db.query_db(cmd)
    for concept in concept_res:
        concept_id = concept[0]
        concept_name = concept[1]
        pinyin = get_pinyin(concept_name).lower()
        cmd = 'UPDATE concept SET pinyin = "%s" WHERE id = %s' % (pinyin, concept_id)
        db.query_db(cmd)
Example #3
0
def lookuporinsert_tag(tag_name):
	tag_object = lookup_tag(tag_name)
	if not tag_object:
		tag_object = Tag()
		tag_object.tag_name = tag_name
		tag_object.tag_bool_deleted=False
		tag_object.pinyin=get_pinyin(tag_name)
		# print tag_object.pinyin
		tag_object.tag_popularity=0
		tag_object.tag_last_update=0
		tag_object.save()
	return tag_object
Example #4
0
    def sample(self, sentence, ignore_unk=False, beamwidth=10):
        ids = self._word_to_idx(sentence, self.dict_src)

        results = self.search_model.apply(numpy.array([ids]).T)
        outputs, scores = results[:2]
        if self.with_attention:
            alignments = results[2]

        if self.normalize:
            lengths = numpy.array([len(s) for s in outputs])
            scores = scores / lengths
        sidx = numpy.argmin(scores)
        res = self._idx_to_word(outputs[sidx][:-1], self.idict_trg)

        translated_unks = set()

        if self.replace_unk and self.with_attention:
            source_words = sentence.split() + [self.eos_token]
            tran_words = res.split()
            alignment = numpy.array(alignments[sidx]).transpose()
            # get the hard alignment
            aligned_source_words = [
                source_words[idx] for idx in numpy.argmax(alignment, axis=0)
            ]
            new_tran_words = []
            for i in xrange(len(tran_words)):
                if tran_words[i] != self.unk_token:
                    new_tran_words.append(tran_words[i])
                else:
                    # replace unk token
                    aligned_source_word = aligned_source_words[i]
                    # note that get_pinyin only accept Chinese word in GBK encoding
                    new_tran_words.append(
                        self.unk_dict.get(aligned_source_word,
                                          get_pinyin(aligned_source_word)))
                    if aligned_source_word in self.unk_dict:
                        translated_unks.add(aligned_source_word)

            logger.info('new_tran_words:%s' % new_tran_words)
            res = " ".join(new_tran_words)

        if self.detokenizer_cmd:
            detokenizer = Popen(self.detokenizer_cmd, stdin=PIPE, stdout=PIPE)
            res, _ = detokenizer.communicate(res)

        unknown_words = [
            word for word, index in zip(sentence.split(), ids)
            if index == self.unk_id and word not in translated_unks
        ]

        return res, unknown_words
Example #5
0
    def replace_unk(self, source_words, output, alignment):
        tran_words = self._idx_to_word(output, self.idict_trg)
        aligned_source_words = [source_words[idx] for idx in numpy.argmax(alignment, axis=0)]
        new_tran_words = []
        for i in xrange(len(tran_words)):
            if tran_words[i] != self.unk_token:
                new_tran_words.append(tran_words[i])
            else:
                # replace unk token
                aligned_source_word = aligned_source_words[i]
                # note that get_pinyin only accept Chinese word in GBK encoding
                new_tran_words.append(self.unk_dict.get(aligned_source_word, get_pinyin(aligned_source_word)))

        return " ".join(new_tran_words)
Example #6
0
table_name = [
    u'基础表_工伤个人待遇支付明细.csv', u'基础表_工伤亡职工变更信息.csv', u'基础表_工伤保险个人参保信息.csv',
    u'基础表_工伤保险个人应缴实缴明细信息.csv', u'基础表_工伤保险个人缴费基数信息.csv', u'基础表_工伤保险个人补退信息.csv',
    u'基础表_工伤保险单位参保信息.csv', u'基础表_工伤保险单位应缴信息.csv', u'基础表_工伤保险单位欠费明细信息.csv',
    u'基础表_工伤保险单位缴费待转基金信息.csv', u'基础表_工伤保险单位补退信息.csv', u'基础表_工伤保险参保个人基本信息.csv',
    u'基础表_工伤保险参保单位基本信息.csv', u'基础表_工伤保险在职人员变更信息.csv', u'基础表_工伤保险征集通知明细信息.csv',
    u'基础表_工伤保险待遇支付信息.csv', u'基础表_工伤保险综合参数表.csv', u'基础表_工伤保险缴费比例信息.csv',
    u'基础表_工伤保险职工平均工资参数表.csv', u'基础表_工伤劳动能力鉴定信息.csv', u'基础表_工伤定期待遇参数.csv',
    u'基础表_工伤职工工伤亡信息.csv', u'基础表_工伤非定期待遇参数.csv', u'工伤个人待遇支付明细.csv',
    u'工伤亡职工变更信息.csv', u'工伤供养亲属变更信息.csv', u'工伤供养亲属基本信息.csv',
    u'工伤供养亲属待遇审批信息.csv', u'工伤保险个人参保信息.csv', u'工伤保险个人应缴实缴明细信息.csv',
    u'工伤保险个人待转基金信息.csv', u'工伤保险个人缴费到账信息.csv', u'工伤保险个人缴费到账明细信息.csv',
    u'工伤保险个人缴费基数信息.csv', u'工伤保险个人补退信息.csv', u'工伤保险人员转移信息.csv',
    u'工伤保险代扣代缴明细信息.csv', u'工伤保险单位参保信息.csv', u'工伤保险单位变更信息.csv',
    u'工伤保险单位变更登记信息.csv', u'工伤保险单位实缴信息.csv', u'工伤保险单位应缴信息.csv',
    u'工伤保险单位欠费明细信息.csv', u'工伤保险单位缴费到账信息.csv', u'工伤保险单位缴费到账明细信息.csv',
    u'工伤保险单位缴费待转基金信息.csv', u'工伤保险单位缴费申报.csv', u'工伤保险单位补退信息.csv',
    u'工伤保险参保个人基本信息.csv', u'工伤保险参保单位基本信息.csv', u'工伤保险在职人员变更信息.csv',
    u'工伤保险征集通知明细信息.csv', u'工伤保险待遇支付信息.csv', u'工伤保险待遇类别与支付项目对照.csv',
    u'工伤保险欠款核销信息.csv', u'工伤保险经办机构.csv', u'工伤保险综合参数表.csv', u'工伤保险缴费比例信息.csv',
    u'工伤保险职工平均工资参数表.csv', u'工伤劳动能力鉴定信息.csv', u'工伤单位实付信息.csv', u'工伤单位应付信息.csv',
    u'工伤定期待遇参数.csv', u'工伤定期待遇审批信息.csv', u'工伤职工工伤亡信息.csv', u'工伤补发退发信息.csv',
    u'工伤非定期待遇参数.csv', u'工伤非定期待遇审批信息.csv'
]

for name in table_name:
    # print name.encode('utf-8')
    print pinyin.get_pinyin(name)
    # print pinyin.get_initial(name).replace(' ', '')
Example #7
0
#!c:\python27\python
# -*- coding: UTF-8 -*-

# 引入 CGI 处理模块
import cgi, cgitb

import pinyin

# 创建 FieldStorage的实例
form = cgi.FieldStorage()

# 接收字段数据
if form.getvalue('textcontent'):
    text_content = pinyin.get_pinyin(form.getvalue('textcontent'))
else:
    text_content = "没有内容"

print "Content-type:text/html"
print
print "<html>"
print "<head>"
print "<meta charset=\"utf-8\">"
print "<title>菜鸟教程 CGI 测试实例</title>"
print "</head>"
print "<body>"
print "<h2> 输入的内容是:%s</h2>" % text_content
print "</body>"
print "</html>"
Example #8
0
u'工伤保险单位缴费到账明细信息.csv',
u'工伤保险单位缴费待转基金信息.csv',
u'工伤保险单位缴费申报.csv',
u'工伤保险单位补退信息.csv',
u'工伤保险参保个人基本信息.csv',
u'工伤保险参保单位基本信息.csv',
u'工伤保险在职人员变更信息.csv',
u'工伤保险征集通知明细信息.csv',
u'工伤保险待遇支付信息.csv',
u'工伤保险待遇类别与支付项目对照.csv',
u'工伤保险欠款核销信息.csv',
u'工伤保险经办机构.csv',
u'工伤保险综合参数表.csv',
u'工伤保险缴费比例信息.csv',
u'工伤保险职工平均工资参数表.csv',
u'工伤劳动能力鉴定信息.csv',
u'工伤单位实付信息.csv',
u'工伤单位应付信息.csv',
u'工伤定期待遇参数.csv',
u'工伤定期待遇审批信息.csv',
u'工伤职工工伤亡信息.csv',
u'工伤补发退发信息.csv',
u'工伤非定期待遇参数.csv',
u'工伤非定期待遇审批信息.csv'
]

for name in table_name:
	# print name.encode('utf-8')
	print pinyin.get_pinyin(name)
	# print pinyin.get_initial(name).replace(' ', '')
Example #9
0
        print(client_pinyins)
        for item in client_pinyins:
            client_pinyin = item[0].split('&')
            content = item[1]
            evaluators.append(Evaluator(client_pinyin, server_pinyin, content))
        for e in evaluators:
            print(e)
        evaluators.sort()

        result_content = evaluators[-1].content
        addToClipBoard(result_content)
        sg.PopupOK('成功输出到剪贴板')
    else:
        print('You entered ', values)
        character = values[0]
        pinyin_list = get_pinyin(character)
        pinyin = ''
        for pinyin_item in pinyin_list:
            pinyin += (pinyin_item[0] + '&')
        content = values[1]
        record = (character, pinyin, content)
        print(record)
        if event in ('Delete'):
            try:
                record_is_choose = values[2][0][0]
            except IndexError:
                continue
            print(record_is_choose)
            c.execute('DELETE FROM CORRES where CHARACTER = \'{}\''.format(
                record_is_choose))
        else:
Example #10
0
def dummy_test():
    assert get_pinyin('你好', 'ni3 hao3')
Example #11
0
from pinyin import get_pinyin


def dummy_test():
    assert get_pinyin('你好', 'ni3 hao3')


if __name__ == "__main__":
    print(get_pinyin("你好?中文!中文的,符号"))