def process_zone_group(zone_name, fliggy_zones, city_name):
    z_name_list = zone_name.split('/')
    total_match_list = []
    for z_name in z_name_list:
        match_list = []
        for fliggy_zone in fliggy_zones:
            haoqiao_zone_name_pinyin = pypinyin.slug(unicode(z_name),
                                                     separator='')
            fliggy_zone_name_pinyin = pypinyin.slug(unicode(fliggy_zone),
                                                    separator='')
            if haoqiao_zone_name_pinyin in fliggy_zone_name_pinyin or fliggy_zone_name_pinyin in haoqiao_zone_name_pinyin:
                match_list.append(fliggy_zone)
        if len(match_list) == 0:
            break
        elif len(match_list) == 1:
            total_match_list.append(match_list[0])
        else:
            selected_zone = calculate_similarity(z_name, match_list)
            total_match_list.append(selected_zone)
    if len(z_name_list) == len(total_match_list):
        ratio = get_ratio(city_name, zone_name, haoqiao_setp1_file_name)
        match_id_list = []
        for match in total_match_list:
            match_id_list.append(
                get_zid_by_cname_zname(city_name, match,
                                       fliggy_setp1_file_name))
        content = city_name + '\t' + '####'.join(
            match_id_list) + '\t' + '####'.join(
                total_match_list) + '\t' + ratio + '\n'
        write2file(final_result_file_name, content)
def process_single_zone(zone_name, fliggy_zones, city_name):
    match_list = []
    for fliggy_zone in fliggy_zones:
        ctrip_zone_name_pinyin = pypinyin.slug(unicode(zone_name),
                                               separator='')
        fliggy_zone_name_pinyin = pypinyin.slug(unicode(fliggy_zone),
                                                separator='')
        if ctrip_zone_name_pinyin in fliggy_zone_name_pinyin or fliggy_zone_name_pinyin in ctrip_zone_name_pinyin:
            match_list.append(fliggy_zone)
    if len(match_list) == 0:  # 未匹配上,需要找出最长公共字串,以便人工处理
        sorted_selected_zones = get_zone_list_by_lcs(zone_name, fliggy_zones)
        if len(sorted_selected_zones) == 1:
            content = zone_name + '\t' + sorted_selected_zones[0][
                0] + '\t' + str(sorted_selected_zones[0][1]) + '\n'
            write2file(ctrip_setp2_file_name, content)
        elif len(sorted_selected_zones) >= 2:
            content = zone_name + '\t' + sorted_selected_zones[0][0] + '\t' + str(sorted_selected_zones[0][1]) + '\t' + \
                sorted_selected_zones[1][0] + '\t' + str(sorted_selected_zones[1][1]) + '\n'
            write2file(ctrip_setp2_file_name, content)
    elif len(match_list) == 1:
        ratio = get_ratio(city_name, zone_name, ctrip_setp1_file_name)
        fliggy_zone_id = get_zid_by_cname_zname(city_name, match_list[0],
                                                fliggy_setp1_file_name)
        content = city_name + '\t' + fliggy_zone_id + '\t' + match_list[
            0] + '\t' + ratio + '\n'
        write2file(final_result_file_name, content)
    elif len(match_list) > 1:
        selected_zone = calculate_similarity(zone_name, match_list)
        ratio = get_ratio(city_name, zone_name, ctrip_setp1_file_name)
        fliggy_zone_id = get_zid_by_cname_zname(city_name, selected_zone,
                                                fliggy_setp1_file_name)
        content = city_name + '\t' + fliggy_zone_id + '\t' + selected_zone + '\t' + ratio + '\n'
        write2file(final_result_file_name, content)
def process_single_zone(zone_name, fliggy_zones, city_name):
    match_list = []
    for fliggy_zone in fliggy_zones:
        haoqiao_zone_name_pinyin = pypinyin.slug(unicode(zone_name),
                                                 separator='')
        fliggy_zone_name_pinyin = pypinyin.slug(unicode(fliggy_zone),
                                                separator='')
        if haoqiao_zone_name_pinyin in fliggy_zone_name_pinyin or fliggy_zone_name_pinyin in haoqiao_zone_name_pinyin:
            match_list.append(fliggy_zone)
    if len(match_list) == 0:
        print '未匹配上'
    elif len(match_list) == 1:
        ratio = get_ratio(city_name, zone_name, haoqiao_setp1_file_name)
        fliggy_zone_id = get_zid_by_cname_zname(city_name, match_list[0],
                                                fliggy_setp1_file_name)
        content = city_name + '\t' + fliggy_zone_id + '\t' + match_list[
            0] + '\t' + ratio + '\n'
        write2file(final_result_file_name, content)
    elif len(match_list) > 1:
        selected_zone = calculate_similarity(zone_name, match_list)
        ratio = get_ratio(city_name, zone_name, haoqiao_setp1_file_name)
        fliggy_zone_id = get_zid_by_cname_zname(city_name, selected_zone,
                                                fliggy_setp1_file_name)
        content = city_name + '\t' + fliggy_zone_id + '\t' + selected_zone + '\t' + ratio + '\n'
        write2file(final_result_file_name, content)
예제 #4
0
 def set2py(self, runType='once'):
     # 每10条处理一次 递归直到全部完成
     vlist = self._db.getNonpySetList(10)
     if False == vlist:
         print("所有剧集拼音全部处理完毕")
         exit()
     for data in vlist:
         newData = {}
         newData['title_py'] = slug(data['title'],
                                    errors='ignore',
                                    separator='')
         newData['title_sp'] = slug(data['title'],
                                    style=Style.FIRST_LETTER,
                                    errors='ignore',
                                    separator='')
         newData['title_pyshow'] = reduce(lambda x, y: x + y,
                                          pinyin(data['title']))
         # if data['summary']:
         #     newData['summary_pyshow'] =  reduce(lambda x,y: x + y, pinyin(data['summary']))
         #newData['tags'] = list(set(list(filter(lambda v: len(v) > 1, list(seg.cut(data['title'])))) + list(filter(lambda v: len(v) > 1, list(seg.cut(data['summary']))))))
         # print(newData['tags'])
         self._db.saveSetPy(newData, data['_id'])  # 保存拼音数据
         print("{} 剧集拼音处理完毕".format(data['_id']))
         del newData
     del vlist, data
     if runType == 'loop': self.set2py()
예제 #5
0
def to_pinyin(hans, initials=False):
    '''utils.to_pinyin(hans, initials=False)'''
    if initials:
        return slug(hans=hans,
                    style=Style.FIRST_LETTER,
                    separator='',
                    errors='ignore')
    return slug(hans=hans, style=Style.NORMAL, separator='', errors='ignore')
예제 #6
0
 def tid(self):
     if self._tid is not None:
         return str(self._tid)
     else:
         tp = pypinyin.slug(self.title, errors='ignore', separator='_')
         ap = pypinyin.slug(self.author, errors='ignore', separator='_')
         tid = '{} {}'.format(tp, ap)
     return tid
예제 #7
0
def tq_init(xcods, xinxs=['000001']):
    qx = zsys.TQ_bar()
    qx.CodPool = xcods
    #qx.codID,qx.codFN=xcod,zsys.rdatCN+xcod+'.csv'
    #
    #
    #
    print('tq_init code...')
    #f_stkCodNamTbl='stk_code.csv'
    fss = zsys.rdatInx + zsys.f_stkCodNamTbl  #;print('f,',fss)
    zsys.stkCodNamTbl = pd.read_csv(fss, dtype={'code': str}, encoding='GBK')
    #
    for xcod in xcods:
        print('xcod:', xcod)
        xd = zsys.stkCodNamTbl[zsys.stkCodNamTbl['code'] == xcod]
        css = xd['name']
        ess = pypinyin.slug(css, style=pypinyin.FIRST_LETTER, separator='')
        xd['enam'] = ess
        zsys.stkLibCodX[xcod] = xd
        #
        fcod = zsys.rdatCN + xcod + '.csv'
        df = pd.read_csv(fcod, index_col=0)
        zsys.stkLib[xcod] = df.sort_index()
    #
    xcod = xcods[0]
    qx.wrkCod = xcod
    qx.wrkCodDat = zsys.stkLib[xcod]
    qx.wrkCodInfo = zsys.stkLibCodX[xcod]
    #
    print('tq_init inx...')
    #f_stkInxNamTbl='inx_code.csv'
    fss = zsys.rdatInx + zsys.f_stkInxNamTbl  #;print('f,',fss)
    zsys.stkInxNamTbl = pd.read_csv(fss, dtype={'code': str}, encoding='GBK')

    for xinx in xinxs:
        print('xinx:', xinx)
        xd = zsys.stkInxNamTbl[zsys.stkInxNamTbl['code'] == xinx]
        css = xd['name']
        ess = pypinyin.slug(css, style=pypinyin.FIRST_LETTER, separator='')
        xd['enam'] = ess
        zsys.stkInxLibCodX[xinx] = xd
        #
        fcod = zsys.rdatCNX + xinx + '.csv'
        df = pd.read_csv(fcod, index_col=0)
        zsys.stkInxLib[xinx] = df.sort_index()
    #
    xinx = xinxs[0]
    qx.wrkInx = xinx
    qx.wrkInxDat = zsys.stkInxLib[xinx]
    qx.wrkInxInfo = zsys.stkInxLibCodX[xinx]
    #
    #
    #df=pd.read_csv(fdat)
    #df=df.sort_values('date')

    #
    return qx
def calculate_similarity(zone_name, fliggy_zones):
    curr_similarity = -1
    curr_fliggy_zone = None
    for fliggy_zone in fliggy_zones:
        zone_name_py = pypinyin.slug(unicode(zone_name), separator='')
        fliggy_zone_py = pypinyin.slug(unicode(fliggy_zone), separator='')
        if Levenshtein.ratio(zone_name_py, fliggy_zone_py) > curr_similarity:
            curr_similarity = Levenshtein.ratio(zone_name_py, fliggy_zone_py)
            curr_fliggy_zone = fliggy_zone
    return curr_fliggy_zone
예제 #9
0
def test_simple_seg():
    data = {
        '北京abcc': 'be3i ji1ng abcc',
        '你好にほんごРусский язык': 'ni3 ha3o にほんごРусский язык',
    }
    for h, p in data.items():
        assert slug([h], style=TONE2, separator=' ') == p

    hans = '你好にほんごРусский язык'
    ret = 'ni3 ha3o'
    assert slug(hans, style=TONE2, separator=' ', errors=lambda x: None) == ret
예제 #10
0
def test_simple_seg():
    data = {
        '北京abcc': 'be3i ji1ng abcc',
        '你好にほんごРусский язык': 'ni3 ha3o にほんごРусский язык',
    }
    for h, p in data.items():
        assert slug([h], style=TONE2, separator=' ') == p

    hans = '你好にほんごРусский язык'
    ret = 'ni3 ha3o'
    assert slug(hans, style=TONE2, separator=' ', errors=lambda x: None) == ret
예제 #11
0
 def showlist(self): #用于搜索后结果的显示
     keywd = self.lineEdit.text().strip()
     if keywd:
         self.listWidget.clear() #清空显示框
         for item in Data.urllist:   #对大小写及拼音的识别转换
             if (keywd.lower() in item.lower()) or (keywd.lower() in pypinyin.slug(item.lower(), separator='') or
                                                    (keywd.lower() in pypinyin.slug(item.lower(),
                                                                                    style=Style.FIRST_LETTER,
                                                                                    separator=''))):
                 self.listWidget.addItem(item)  # 加载搜索结果
     else:
         self.listWidget.clear()
         for item in Data.urllist:
             self.listWidget.addItem(item)  # 空字符时,加载所有列表
예제 #12
0
 def post(self, request):
     """
     钉钉登录
     :param request:
     :return:
     """
     data = JSONParser().parse(request)
     timestamp = str(int(time.time() * 1000))
     response = requests.post(
         url=
         'https://oapi.dingtalk.com/sns/getuserinfo_bycode?signature={}&timestamp={}&accessKey=dingoapfjxo0dzezwe47sy'
         .format(parse.quote(signature(timestamp)), timestamp),
         json={"tmp_auth_code": data['code']})
     try:
         response = response.json()
         if response["errcode"] == 0:
             try:
                 user = UserProfile.objects.get(
                     unionid=response['user_info']['unionid'])
                 user = User.objects.get(id=user.user_id)
             except Exception as e:
                 password = make_password('admin')
                 with transaction.atomic():
                     try:
                         user = User.objects.create(
                             username=pypinyin.slug(
                                 response['user_info']['nick'],
                                 separator=''),
                             password=password,
                             first_name=response['user_info']['nick'])
                     except Exception as e:
                         user = User.objects.create(
                             username=pypinyin.slug(
                                 response['user_info']['nick'],
                                 separator='') +
                             str(random.randint(0, 9999)),
                             password=password,
                             first_name=response['user_info']['nick'])
                     UserProfile.objects.create(
                         user=user,
                         openId=response['user_info']['openid'],
                         unionid=response['user_info']['unionid'])
             data = TokenSerializer(Token.objects.get(user=user)).data
             data["userphoto"] = '/file/userphoto.jpg'
             return JsonResponse(data=data, code="999999", msg="成功")
         else:
             return JsonResponse(code="999998", msg='登录失败!')
     except:
         return JsonResponse(code="999998", msg='登录失败!')
예제 #13
0
 def showlist(self, event):
     keywd = self.keywdbox.get().strip()
     if keywd:
         self.listbox.delete(0, END)
         # print(urllist)
         for item in self.urllist:
             cond_1 = keywd.lower() in item.lower()
             cond_2 = keywd.lower() in pypinyin.slug(item.lower(), separator='')
             cond_3 = keywd.lower() in pypinyin.slug(item.lower(), style=Style.FIRST_LETTER, separator='')
             if any([cond_1, cond_2, cond_3]):
                 self.listbox.insert(END, item)  # 加载搜索结果
     else:
         self.listbox.delete(0, END)
         for item in self.urllist:
             self.listbox.insert(END, item)  # 空字符时,加载所有列表
예제 #14
0
def str_to_pinyin(chi_characters):
    '''
    获取中文字符串的简拼和全拼字符串
    '''
    if not isinstance(chi_characters, unicode):
        chi_characters = chi_characters.decode('utf-8')

    # 获得昵称的简拼,如 张三:zs
    chi_spell = pypinyin.slug(chi_characters,
                              separator='',
                              style=pypinyin.FIRST_LETTER)
    # 获得昵称的全拼, 张三: zhangsan
    chi_spell_all = pypinyin.slug(chi_characters, separator='')

    return chi_spell, chi_spell_all
예제 #15
0
파일: Utils.py 프로젝트: yswhynot/vophoto
def update_user_photo_indexer(user_id, image):
    filename = get_user_path(user_id) + "/" + "indexer.dat"
    indexer = mc.get(user_id)
    if not indexer:
        if not os.path.exists(filename):
            indexer = {}
        else:
            with open(filename, 'rb') as fp:
                indexer = pickle.load(fp)

    if indexer is None:
        return

    tags = image['tags']
    image_name = image['image_name']

    for t in tags:
        pt = pypinyin.slug(t)
        photo_list = indexer.get(pt, [])
        photo_list.append(image_name)
        indexer[pt] = photo_list

    with open(filename, 'wb') as fp:
        pickle.dump(indexer, fp)

    mc.set(user_id, indexer)
    return indexer
def correct_txt_with_info(txt, dic):
    """
    修改文章中需要纠错的单词,然后告诉用户文章中专业词汇的个数和相关词汇
    :param txt: 需要纠错的文本
    :param dic: 字典化的标准库
    :return: 纠错过的文本和相关词汇信息
    """
    txt_num = len(txt)  # 文本的长度
    word_num = 0  # 领域词汇的个数,初始值为0
    word_list = []  # 领域词汇的列表,初始值为0
    value = 0  # 分片字符串,一开始从头开始分片
    txt1 = txt
    vocabulary_trie = pytrie.SortedStringTrie(dic)  # 生成拼音字典的匹配trie

    while value < txt_num:
        tem_txt = txt[value:txt_num]  # 得到字串
        tem_py = pypinyin.slug(tem_txt)  # 字串的拼音
        """开始处理字串,把字串作为参数,进行匹配"""
        result_match = vocabulary_trie.longest_prefix_value(tem_py, default='false')
        if result_match == 'false':
            value = value + 1
            continue
        else:
            need_change = tem_txt[0:len(result_match)]  # 需要被纠错的单词
            txt1 = txt1.replace(need_change, result_match)
            word_num = word_num + 1  # 领域词汇数量增加一个
            word_list.append(result_match)  # 领域词汇列表增加一个
            value = value + len(need_change)

    result = {'text': txt1, 'num': word_num, 'word': word_list}
    return result
예제 #17
0
 def create(self, validated_data):
     user = self.context["self"].current_user
     storage = validated_data.pop("storage")
     product_pictures = validated_data.pop("pictures")
     validated_data["name_acronym"] = slug(validated_data["name"],
                                           separator="")
     with transaction.atomic():
         # 创建一个保存点
         save_id = transaction.savepoint()
         try:
             # 添加货品
             product = create_product(validated_data, user.id)
             # 添加货品轮播图
             create_product_pictures(product.id, product_pictures)
             # 更改库存,同时生成库存更改记录
             update_product_storage_and_create_record(
                 product,
                 user.id,
                 storage,
                 ProductStorageRecordType.MANUAL_MODIFY,
                 ProductStorageRecordOperatorType.STAFF,
             )
         except Exception as e:
             print(e)
             # 回滚到保存点
             transaction.savepoint_rollback(save_id)
             raise
         # 提交事务
         transaction.savepoint_commit(save_id)
     return product
예제 #18
0
 def generate_user(self, university=None, gender="女"):
     if gender == "女":
         name = self.family_names[int(
             random.uniform(0, len(
                 self.family_names)))] + self.girl_names[int(
                     random.uniform(0, len(self.girl_names)))]
         pic = self.girl_imgs[int(random.uniform(0, len(self.girl_imgs)))]
     else:
         name = self.family_names[int(
             random.uniform(0, len(
                 self.family_names)))] + self.boy_names[int(
                     random.uniform(0, len(self.boy_names)))]
         pic = self.boy_imgs[int(random.uniform(0, len(self.boy_imgs)))]
     pinyin = pypinyin.slug(name.decode('utf-8'), separator='')
     mail = '%s%d@%s.com' % (pinyin, random.uniform(
         1000000000, 9999999999), self.mail_postfix[int(
             random.uniform(0, len(self.mail_postfix)))])
     if not university:
         school = university_211[int(random.uniform(0,
                                                    len(university_211)))]
     else:
         school = university
     # print(mail, psw, pic, name, school, '', name, gender)
     # uid = 1234
     uid = self.um.register_user_sync(mail, psw, pic, name, school, '',
                                      name, gender)
     return {
         'user': mail,
         'icon': pic,
         'real_name': name,
         'university': school,
         'uid': uid
     }
예제 #19
0
def data_process(input_df):
    '''

    :param input_df: dataframe,columns=['person_id', 'name', 'rankaff_name', 'rankaff_id', 'ins_en', 'aff_id']
    :return: [{'person_id':1234564, 'name':'liu bo', 'ins':['fudan university', 'xx university', 'xxx university'],
                'ins_id':[111, 222, 333], 'name_zh':'刘博'}, {...}]
    '''
    input_data = []
    for value, sub_df in input_df.groupby('person_id'):
        row_dict = {}
        row_dict['person_id'] = value
        row_dict['ins'] = list(sub_df['ins_en'])
        row_dict['ins_id'] = list(sub_df['aff_id'])

        name_zh = sub_df.iloc[0]['name']
        row_dict['name_zh'] = name_zh
        name_py = slug(name_zh, separator='-').replace('v', 'ü')
        name_list = name_py.split('-')
        # 如果是复姓
        if name_zh[:2] in compound_surname:
            row_dict['name'] = ''.join(name_list[:2]).capitalize() + ' ' + ''.join(name_list[2:]).capitalize()
        # 如果是多音字姓
        elif name_zh[:1] in polyphony_surname.keys():
            row_dict['name'] = polyphony_surname[name_zh[:1]] + ' ' + ''.join(name_list[1:]).capitalize()
        # 非复姓非多音字姓
        else:
            row_dict['name'] = ''.join(name_list[:1]).capitalize() + ' ' + ''.join(name_list[1:]).capitalize()

        input_data.append(row_dict)

    return input_data
예제 #20
0
파일: Utils.py 프로젝트: yswhynot/vophoto
def update_user_photo_indexer(user_id, image):
    filename = get_user_path(user_id) + "/" + "indexer.dat"
    indexer = mc.get(user_id)
    if not indexer:
        if not os.path.exists(filename):
            indexer = {}
        else:
            with open(filename, "rb") as fp:
                indexer = pickle.load(fp)

    if indexer is None:
        return

    tags = image["tags"]
    image_name = image["image_name"]

    for t in tags:
        pt = pypinyin.slug(t)
        photo_list = indexer.get(pt, [])
        photo_list.append(image_name)
        indexer[pt] = photo_list

    with open(filename, "wb") as fp:
        pickle.dump(indexer, fp)

    mc.set(user_id, indexer)
    return indexer
예제 #21
0
 def generate_user(self, university=None, gender="女"):
     if gender == "女":
         name = self.family_names[int(random.uniform(0, len(self.family_names)))] + self.girl_names[
             int(random.uniform(0, len(self.girl_names)))]
         pic = self.girl_imgs[int(random.uniform(0, len(self.girl_imgs)))]
     else:
         name = self.family_names[int(random.uniform(0, len(self.family_names)))] + self.boy_names[
             int(random.uniform(0, len(self.boy_names)))]
         pic = self.boy_imgs[int(random.uniform(0, len(self.boy_imgs)))]
     pinyin = pypinyin.slug(name.decode('utf-8'), separator='')
     mail = '%s%d@%s.com' % (pinyin, random.uniform(1000000000, 9999999999),
                             self.mail_postfix[int(random.uniform(0, len(self.mail_postfix)))])
     if not university:
         school = university_211[int(random.uniform(0, len(university_211)))]
     else:
         school = university
     # print(mail, psw, pic, name, school, '', name, gender)
     # uid = 1234
     uid = self.um.register_user_sync(mail, psw, pic, name, school, '', name, gender)
     return {
         'user': mail,
         'icon': pic,
         'real_name': name,
         'university': school,
         'uid': uid
     }
예제 #22
0
파일: genhtml.py 프로젝트: yunfzhai/mdblog
def str2pinyin(hans, nameset,style=pypinyin.FIRST_LETTER):    #字符串转拼音,默认只获取首字母
    pinyin_str = pypinyin.slug(hans, style=style, separator="")
    num = 2
    while pinyin_str in nameset:
        pinyin_str += str(num)
        num += 1
    return pinyin_str
예제 #23
0
def get_classify_dir_path(category_name):
    pypinyin_slug = pypinyin.slug(category_name,
                                  separator='',
                                  style=Style.FIRST_LETTER)
    # print(pypinyin_slug)
    dir_path = rootDir + 'p**n' + os.sep
    if pypinyin_slug.endswith('JH') and 'zpdrycsq' in pypinyin_slug:
        # dir_path = '../jh/zpdr_ycsq_jh/'
        dir_path = dir_path + 'jh/zpdr_ycsq_jh/'
    elif (not pypinyin_slug.endswith('JH')) and 'zpdrycsq' in pypinyin_slug:
        # dir_path = '../all/zpdr_ycsq_all/'
        dir_path = dir_path + 'all/zpdr_ycsq_all/'
    elif pypinyin_slug.endswith('JH') and 'wawq' in pypinyin_slug:
        # dir_path = '../jh/wawq_jh/'
        dir_path = dir_path + 'jh/wawq_jh/'
    elif 'wawq' in pypinyin_slug:
        # dir_path = '../all/wawq_all/'
        dir_path = dir_path + 'all/wawq_all/'
    elif 'xqfx' in pypinyin_slug:
        # dir_path = '../all/xqfx/'
        dir_path = dir_path + 'all/xqfx/'
    elif pypinyin_slug.endswith('JH') and 'yczp' in pypinyin_slug:
        # dir_path = '../jh/yczp_jh/'
        dir_path = dir_path + 'jh/yczp_jh/'
    elif (not pypinyin_slug.endswith('JH')) and 'yczp' in pypinyin_slug:
        # dir_path = '../all/yczp_all/'
        dir_path = dir_path + 'all/yczp_all/'
    return dir_path
예제 #24
0
def convertpinyin(list):
    """convert all talbe head value from chinese to chinese pinyin and save in a list"""
    sqlfield = []
    for value in list:
        field = pypinyin.slug(value, separator = '')
        sqlfield.append(field)
    return sqlfield
예제 #25
0
def get_correct_trans_str(match_str):
    # 检查这个key是否已经在国际化文件里了
    trans_res_str = ''
    if match_str in bundle_data_dict.keys():
        trans_res_str = 'k' + str(bundle_data_dict.get(match_str)).strip('"')
    else:
        # 添加到string key
        trans_pinyin_str = slug(match_str, errors='ignore', separator='')
        # 3PH+英文/拼音概括(3代表占位符个数3个,依次类推)
        ph_num = str(match_str).count('%@')
        if ph_num > 0:
            trans_pinyin_str = str(ph_num) + 'PH' + trans_pinyin_str
        if len(trans_pinyin_str) > 50 - len(string_name_head):
            trans_pinyin_str = trans_pinyin_str[0:50 - len(string_name_head)]
        trans_res_str = '"' + string_name_head + trans_pinyin_str + '"'
        index = 0
        while (trans_res_str in bundle_data_dict.keys()):
            index += 1
            trans_res_str = '"' + string_name_head + trans_pinyin_str + str(
                index) + '"'
        bundle_data_dict[trans_res_str] = match_str
        bundle_data_dict[match_str] = trans_res_str
        string_key_list_add[trans_res_str] = match_str
        trans_res_str = 'k' + trans_res_str.strip('"')

    string_name_list.add(match_str)
    return trans_res_str
예제 #26
0
 def create_user_by_employee(self, employee_id, password, active=True):
     """
     通过员工创建Odoo用户
     安装依赖 pypinyin:  pip install pypinyin
     """
     employee = self.env['hr.employee'].sudo().search([('id', '=',
                                                        employee_id)])
     if employee:
         # 账号生成改为格式:姓名全拼+手机号末四位@企业邮箱域名
         email_name1 = pypinyin.slug(employee.name, separator='')  # 全拼
         # email_name1 = pypinyin.slug(employee.name, style=Style.FIRST_LETTER, separator='') # 首字母
         email_name2 = employee.mobile_phone[7:]  # 取手机号末四位
         email_name = email_name1 + email_name2
         # 这里后续可以加个开关,让管理员自己决定使用其他域名或企业邮箱域名
         url = self.env['ir.config_parameter'].sudo().get_param(
             'mail.catchall.domain')
         if url:
             email_host = url
         else:
             email_host = 'dingtalk.com'
         email_count = len(
             self.search([('login', 'like', email_name)]).sudo())
         if email_count > 0:
             user = self.env['res.users'].sudo().search([
                 ('login', '=', email_name + '@' + email_host)
             ])
             values = {'user_id': user.id}
             employee.sudo().write(values)
         else:
             email = email_name + '@' + email_host
             # 获取不重复的姓名
             name = employee.name
             name_count = len(self.search([('name', 'like', name)]).sudo())
             if name_count > 0:
                 name = name + str(name_count + 1)
             # 创建Odoo用户
             values = {
                 'active': active,
                 "login": email,
                 "password": password,
                 "name": name,
                 'email': employee.work_email,
                 'groups_id': self.env.ref('base.group_user')
             }
             user = self.sudo().create(values)
             # 首次自动创建odoo用户后发送钉钉工作通知给该员工
             msg = {
                 'msgtype': 'text',
                 'text': {
                     "content":
                     "尊敬的{},欢迎加入odoo,您的登陆名为{},初始登陆密码为{},请登陆后及时修改密码!".format(
                         name, email, password),
                 }
             }
             self.env['dindin.work.message'].sudo().send_work_message(
                 userstr=employee.din_id, msg=msg)
             # 注册成功后,自动关联员工与用户
             values = {'user_id': user.id}
             employee.sudo().write(values)
예제 #27
0
def trans(value):
    result = pypinyin.slug(value,
                           style=Style.NORMAL,
                           strict=False,
                           separator='')
    test_dcit[value] = result
    test_list.append(result)
    return result
예제 #28
0
 def __jieba_html(self,html):
     """jieba分词"""
     keys = []
     words = pseg.cut(html)
     for word,flag in words:
         if flag in ['ns','n','nt','nz']: # 中文分词的词性类别 参考https://www.cnblogs.com/adienhsuan/p/5674033.html
             keys.append(slug(word,separator=''))
     return keys
예제 #29
0
def get_zone_list_from_fliggy_city_pinyin(city_name, file_name):
    zone_list = []
    for i in range(1, len(open(file_name, "rU").readlines()) + 1):
        line = linecache.getline(file_name, i)
        field_list = line.split('\t')
        if city_name == pypinyin.slug(unicode(field_list[2]), separator=''):
            zone_list.append(field_list[4])
    return zone_list
예제 #30
0
파일: generate.py 프로젝트: niu2x/blog
def str2pinyin(hans, style=pypinyin.FIRST_LETTER):
    """字符串转拼音,默认只获取首字母
    """
    pinyin_str = pypinyin.slug(hans, style=style, separator="")
    num = 2
    while pinyin_str in _pinyin_names:
        pinyin_str += str(num)
        num += 1
    return pinyin_str
def addition_pinyin(data, num):
    """得到一个有症状名和对应拼音的列表,返回这个列表"""
    """data参数是str_of_result函数处理后的列表,num是处理后列表的长度"""
    row = []  # 新建一个空列表
    for value in range(0, num):
        py = pypinyin.slug(data[value])  # 每个词的拼音
        tem = [data[value], py]
        row.append(tem)
    return row
예제 #32
0
def h2p(hanzi):
    """
    :param hanzi: 传入汉字
    :return: 转出拼音
    """
    # pinyin = pypinyin.slug(hanzi, separator='', style=Style.FIRST_LETTER)
    # 下面是全称拼音用法
    pinyin = pypinyin.slug(hanzi, separator='')
    return pinyin
예제 #33
0
def str2pinyin(hans, style=pypinyin.FIRST_LETTER):
    """字符串转拼音,默认只获取首字母
    """
    pinyin_str = pypinyin.slug(hans, style=style, separator="")
    num = 2
    while pinyin_str in _pinyin_names:
        pinyin_str += str(num)
        num += 1
    return pinyin_str
예제 #34
0
def data_process2(input_df):
    '''

    :param input_df: dataframe,columns=['person_id', 'name', 'rankaff_name', 'rankaff_id', 'ins_en', 'aff_id']
    :return: [{'person_id':1234564, 'name':['Huang Ka', 'Huang Qia'], 'ins':['fudan university', 'xx university', 'xxx university'],
                'ins_id':[111, 222, 333], 'name_zh':'黄卡'}, {...}]
    '''
    input_data = []
    for value, sub_df in input_df.groupby('person_id'):
        row_dict = {}
        row_dict['person_id'] = value
        if 'ins' in sub_df.columns:
            row_dict['ins'] = list(sub_df['ins_en'])
            row_dict['ins_id'] = list(sub_df['aff_id'])
        else:
            row_dict['ins'] = ['']
            row_dict['ins_id'] = ['']

        name_zh = sub_df.iloc[0]['name']
        row_dict['name_zh'] = name_zh.strip()
        # 如果是复姓
        if name_zh[:2] in compound_surname:
            k = 2
            first_name = slug(name_zh[:2], separator='')

        # 如果是多音字姓
        elif name_zh[:1] in polyphony_surname.keys():
            k = 1
            first_name = polyphony_surname[name_zh[:1]]
        # 非复姓非多音字姓
        else:
            k = 1
            first_name = slug(name_zh[:1], separator='')

        last_name = pinyin(name_zh[k:], heteronym=True, style=Style.NORMAL)
        name_list = []
        for x in product(*last_name):
            name_list.append(first_name.capitalize() + ' ' + ''.join(x).capitalize())

        row_dict['name'] = [i.replace('v', 'ü') for i in name_list]

        input_data.append(row_dict)

    return input_data
예제 #35
0
파일: Utils.py 프로젝트: yswhynot/vophoto
def get_object_keywords(key_words):
    keys = []
    for k in key_words:
        pair = k.split("_")
        if pair is None or len(pair) < 2:
            continue

        if pair[1] in Config.config["object_pos"]:
            keys.append(pypinyin.slug(pair[0]))
    return keys
예제 #36
0
def get_meaningful_keywords(key_words):
    keys = []
    for k in key_words:
        pair = k.split('_')
        if pair is None or len(pair) < 2:
            continue
        
        if pair[1] in Config.config['meaningful_pos']:
            keys.append(pypinyin.slug(pair[0]))
    return keys
예제 #37
0
def get_face_final(face):
    ##face:  ['face_id,face_name','face_id,face_name',...]
    face_final = []
    for k in face:
        pair = k.split(',')
        if pair is None or len(pair) < 1:
            continue
        pair[2] = pypinyin.slug(pair[1])
        face_final.append(pair)    
    return face_final
예제 #38
0
def test_update():
    data = {
        '便宜': 'pia2n yi2',
        '便宜从事': 'bia4n yi2 co2ng shi4',
        '便宜施行': 'bia4n yi2 shi1 xi2ng',
        '便宜货': 'pia2n yi2 huo4',
        '贪便宜': 'ta1n pia2n yi2',
        '讨便宜': 'ta3o pia2n yi2',
        '小便宜': 'xia3o pia2n yi2',
        '占便宜': 'zha4n pia2n yi2',
    }
    for h, p in data.items():
        assert slug([h], style=TONE2, separator=' ') == p
예제 #39
0
파일: Utils.py 프로젝트: yswhynot/vophoto
def get_human_names(raw):
    keys = []
    key_words = raw.split(" ")
    if key_words is None or len(key_words) == 0:
        return

    for k in key_words:
        pair = k.split("_")
        if pair is None or len(pair) < 2:
            continue

        if pair[1] in Config.config["human_name_pos"]:
            keys.append(pypinyin.slug(pair[0]))

    return keys
예제 #40
0
파일: Utils.py 프로젝트: yswhynot/vophoto
def translate_tags(tags):
    Logger.debug("translate_tags in")
    cv_tags = mc.get("cv_tags")
    if not cv_tags:
        Logger.debug("translate_tags load from file")
        cv_tags = load_cv_tags()
        mc.set("cv_tags", cv_tags)

    Logger.debug("translate_tags 3")
    ret = []
    pytags = [pypinyin.slug(w) for w in tags]
    for tag in pytags:
        cand = cv_tags.get(tag, [])
        ret.extend(cand)

    return ret
예제 #41
0
def test_update():
    data = {
        '便宜': 'pia2n yi2',
        '便宜从事': 'bia4n yi2 co2ng shi4',
        '便宜施行': 'bia4n yi2 shi1 xi2ng',
        '便宜货': 'pia2n yi2 huo4',
        '贪便宜': 'ta1n pia2n yi2',
        '讨便宜': 'ta3o pia2n yi2',
        '小便宜': 'xia3o pia2n yi2',
        '占便宜': 'zha4n pia2n yi2',
        '\u3400': 'qiu1',  # CJK 扩展 A:[3400-4DBF]
        '\u4E00': 'yi1',   # CJK 基本:[4E00-9FFF]
        '\uFA29': 'da3o',  # CJK 兼容:[F900-FAFF]
    }
    for h, p in data.items():
        assert slug([h], style=TONE2, separator=' ') == p
예제 #42
0
파일: Utils.py 프로젝트: yswhynot/vophoto
def load_cv_tags():
    cv_tags = {}
    path = os.path.dirname(os.path.realpath(__file__)) + "/category.txt"
    if not os.path.exists(path):
        return {}

    file = open(path, encoding="utf-8")
    for line in file:
        items = line.strip().split(":")
        tag = items[0]
        words = [pypinyin.slug(w) for w in items[1].split("-")]
        for word in words:
            if not word in cv_tags.keys():
                cv_tags[word] = []
            cv_tags[word].append(tag)

    return cv_tags
예제 #43
0
def test_slug():
    hans = '中心'
    assert slug(hans) == 'zhong-xin'
    assert slug(hans, heteronym=True) == 'zhong-xin'
예제 #44
0
def get_tag_from_rawlocation(key_location):
    tags = key_location[2:]
    tagpy = []
    for item in tags:
        tagpy.append(pypinyin.slug(item))
    return tagpy
예제 #45
0
def getPinyin(word):
    pinyin = pypinyin.slug(word, style=pypinyin.TONE2)  # 多音字的没弄!
    pinyin = pinyin.replace("-", " ").replace("  ", " ").replace("  ", " ")
    #    print pinyin
    return pinyin
예제 #46
0
    def post(self):
        result = {'status': False}
        Logger.debug('in search')
        try:
            user_id = self.get_argument('user_id', '')
            desc = self.get_argument('desc','')
#             desc = ['我_r 想_v 找_v 在_p 天安门_ns 的_u 照片_n']
            rawTag = self.get_argument('tag', '')
#             rawTag = '我_r 想_v 找_v 今年_nt 在_p 兵马俑_ns 的_u 照片_n'
            # 我_r 想_v 找_v 去年_nt 夏天_nt 在_p 兵马俑_ns 农贸市场_n 的_u 照片_n
            rawLocation = self.get_argument('loc','')
            token = self.get_argument('token','')
            user = MongoHelper.get_user_by_id(user_id)
            Logger.info('user_id:' + user_id + ', raw tag:' + ', raw location:' + ', token:' + token)
            
            if token is not user['token']:
                self.write(json.dumps(result))
                Logger.debug('token wrong')
                return
         
            if user_id == '' or rawTag == '':
                Logger.debug('user id or tag null')
                self.write(json.dumps(result))
                return

            key_words = rawTag.split(' ')
            if key_words is None or len(key_words) == 0:
                Logger.debug('key words none')
                self.write(json.dumps(result))
                return
            
            meaningful = Utils.get_meaningful_keywords(key_words)
            if not meaningful:
                Logger.debug('meaningful key words none')
                return
            Logger.info('meaningful:' + str(meaningful))
            
            if rawLocation: 
                key_location = rawLocation.split(',')
                latitude = float(key_location[0])
                longitude = float(key_location[1])
                Logger.info('Latitude: ' + str(latitude) + ', Longitude: ' + str(longitude))
            else:
                latitude = None
                longitude = None
                Logger.info('No location info in search')
            
            face_name = Utils.get_human_names(rawTag)
            Logger.info('face name:' + str(face_name))
            face_id = list(MongoHelper.get_similar_persons(user_id,face_name))  
            Logger.info('Similar person face id:' + str(face_id))
            if face_id :
                meaningful.extend(face_id)
                Logger.info('meaningful_add_face_id:' + str(meaningful))
            Logger.debug("before cv: " + str(key_words))
            object_name = Utils.get_object_keywords(key_words)
            Logger.debug("before cv: " + str(object_name))
            cv_tags = Utils.translate_tags(object_name)
            Logger.debug("after cv: " + str(cv_tags))
            if cv_tags:
                meaningful.extend(cv_tags)
                Logger.debug('meaningful_add_cv_tag:' + str(meaningful))
            
            processed_time = NLPTimeConvertor.time_api(rawTag, user_id)
            Logger.debug('time api return:' + str(processed_time))
            
            image = []
            if processed_time[0]:
                image = Utils.get_image_by_time(user_id, processed_time[0])
            if processed_time[1]:
                meaningful.append(pypinyin.slug(processed_time[1]))
            if meaningful:
                image = Utils.get_images_by_tag(user_id, meaningful, image)
            image = Utils.sort_by_location(user_id, latitude, longitude, image)

            Logger.info('returned image:' + str(image))    
            result['status'] = True
            result['image'] = image
            Logger.debug('result: ' + str(result))


        finally:
            self.write(json.dumps(result))
예제 #47
0
파일: autocode.py 프로젝트: sijunlv/WX
def process():
    global nummap
    global SEP
    fields = get_field_list()
    pinyins = map(lambda x: x.replace('__', '_'), map(lambda x: filter(lambda x: x if x.isalnum() or x == SEP else '', x), map(lambda x: nummap[x[0]] + SEP + x[1:] if x[0].isdigit() else x, map(lambda x: pypinyin.slug(x.decode('utf-8'), separator=SEP), fields))))
    set_ = set(pinyins)
    if len(pinyins) != len(set_):
        dict_ = dict(zip(list(set_), [0] * len(set_)))
        for pinyin in pinyins:
            dict_[pinyin] += 1
        for key in dict_.keys():
            if dict_[key] > 1:
                print 'the same name: %s' % key
        raise Exception('variables having the same name')
    make(fields, pinyins)
예제 #48
0
파일: utils.py 프로젝트: Y-Lab/Y-System
def to_pinyin(hans, initials=False):
    '''utils.to_pinyin(hans, initials=False)'''
    if initials:
        return slug(hans=hans, style=Style.FIRST_LETTER, separator='', errors='ignore')
    return slug(hans=hans, style=Style.NORMAL, separator='', errors='ignore')
예제 #49
0
# -*- coding: utf-8 -*-
import mysql.connector;
import csv;
import codecs;
from collections import OrderedDict;
import re;
import time;
from pypinyin import pinyin,lazy_pinyin;
import pypinyin

cnx=mysql.connector.connect(user='******',password='******',host='localhost',database='wholeren',charset='utf8');
cursor=cnx.cursor();

updateClient=("UPDATE client set pinyin=%s where id=%s");


cursor.execute('select chineseName,id from client;');
aa=cursor.fetchall();
for row in aa:
	if row[0] is not None:
		p= pypinyin.slug(unicode(row[0]),style=pypinyin.NORMAL,separator='');
		
		cursor.execute("""UPDATE client SET pinyin=%s WHERE id=%s""",(p,row[1]));
		print p
		#print cursor.fetchOne();
cnx.commit();
cursor.close();
cnx.close();
예제 #50
0
def test_slug():
    hans = u'中心'
    assert slug(hans) == 'zhong-xin'