def filter_name(surname, gender, attr): """ Filter names with gender, general name word and name score. :param surname: surname of name. :param gender: gender of name. :param attr: attribute list of name. :return: None """ sur_type = 1 if len(surname) == 1 else 2 db_obj = DBOP() name_tuple = db_obj.get_wuxing_name(attr) # Get all match words match_count = 0 while 1: if match_count == 5: break name_info = name_tuple[randint(0, len(name_tuple))] name_id, name = name_info[0], name_info[1] full_name = surname + name # Match gender and general name word. if gender != ngender.guess( full_name)[0][0].upper() or not db_obj.match_name_word(name): continue if not name_score(full_name, sur_type): continue print("[-] 候选名字:%s" % full_name) match_count += 1 name_source = db_obj.get_name_source(name_id) print("[-] 名字出处:") print(name_source[2]) print(name_source[1] + '(' + name_source[0] + ')') print(name_source[3]) print('\n')
def main(xml): content = xml.find("Content").text if '垃圾' in content: index = content.find('垃圾') print(content[index + 3:len(content)]) content = whatisit(content[index + 3:len(content)]) elif '猜性别' in content: index = content.find('猜性别') if ngender.guess(content[index + 4:len(content)])[0] == 'male': print("(仅供娱乐) 我猜" + content[index + 4:len(content)] + "是" + "一个帅气的男人!! ") content = "(仅供娱乐) 我猜" + content[index + 4:len(content )] + "是" + "一个帅气的男人!! " else: print("(仅供娱乐) 我猜" + content[index + 4:len(content)] + "是" + "一个漂亮的仙女~~~") content = "(仅供娱乐) 我猜" + content[index + 4:len(content )] + "是" + "一个漂亮的仙女~~~" elif "拼音" in content: index = content.find('拼音') print(content[index + 3:len(content)]) content = pingyin(content[index + 3:len(content)]) elif "翻译" in content: index = content.find('翻译') print(content[index + 3:len(content)]) content = fanyi(content[index + 3:len(content)]) else: content = autoReply(content) return content
def get_name_cell_phone(req, x): ''' :param req: str格式的html页面 :param x: 目录列表,获取公司名和地区用的 :param sex: 性别,male男性 female女性,基于朴素贝叶斯计算的概率推测 :return: [[姓名,手机号,地区,公司名],[姓名,手机号,地区,公司名]······] ''' if req.strip(): html = lxml.etree.HTML(req) list1 = html.xpath('//ul[@class=\"con-txt\"]//label[contains(text(),"联系人:")]/following-sibling::a/text()') if not list1: list1 = html.xpath('//ul[@class=\"con-txt\"]//label[contains(text(),"联系人:")]/ancestor::li/text()') # TODO: xpath把这个特殊编码解析了无法转回来,只能正则匹配了 # phone = html.xpath('//ul[@class=\"con-txt\"]//label[contains(text(),"手机:")]/following-sibling::span[1]/text()') if list1: try: # 过滤女性,非中文会报错 unknown = ngender.guess(list1[0]) # todo:返回格式('male', 0.9836229687547046) 预测的性别,男/女性化的深度 if unknown[0] == 'male' or (unknown[0] == 'female' and unknown[1] < 0.7): # 获取手机号原加密格式 phone = search("(?s)<span class='secret'>(.*?)</span>", req) if phone: # 解密,并添加地区和公司名 list2 = [int(sub('&#x', '0x', x), 16) for x in phone.group(1).split(';')[:-1]] list1.extend([''.join([str(i - list2[0] + 1) for i in list2])+'\t', x[2], x[1]]) return list1 except: # 获取手机号原加密格式 phone = search("(?s)<span class='secret'>(.*?)</span>", req) if phone: # 解密,并添加地区和公司名 list2 = [int(sub('&#x', '0x', x), 16) for x in phone.group(1).split(';')[:-1]] list1.extend([''.join([str(i - list2[0] + 1) for i in list2])+'\t', x[2], x[1]]) return list1
def test_name(self): self.assertEqual(guess("李胜男")[0], 'female') self.assertEqual(guess("李胜")[0], 'male') self.assertEqual(guess("李男")[0], 'male') self.assertEqual(guess("李招弟")[0], 'female') self.assertEqual(guess("李招")[0], 'male') self.assertEqual(guess("李弟")[0], 'male')
def get_subpage_info(url_pool): mentor_list = [] for sub_url in url_pool: url = url_base + sub_url response = urllib2.urlopen(url) html = response.read() #print html #print type(html) comp = re.compile( r'<span style="font-family: 宋体">(.*?)</span></span></strong>') names = comp.findall(html) for name in names: mentor = {} name, title = name.split(r'(') title = title.rstrip(')') gender = ngender.guess(name) mentor['name'] = name mentor['gender'] = gender mentor['title'] = title mentor['department'] = sub_url mentor_list.append(mentor) print url if len(names) != 0: continue else: comp = re.compile( r'<div align="center" style="height: 30px">(.*?)</div>') names = comp.findall(html) for name in names: mentor = {} mentor['name'] = name gender = ngender.guess(name) mentor['gender'] = gender mentor['title'] = 'unknown' mentor['department'] = sub_url mentor_list.append(mentor) return mentor_list
def execute_command(self, cmd, args, nick, user, channel): # DONE:0 Finish function command_string # TODO:0 More commands and command interface ctime = strftime("%Y-%m-%d %H:%M:%S") try: (gender, confidence) = ngender.guess(args) if gender == "male": gender = "男" elif gender == "female": gender = "女" else: gender = "未知" except Exception: gender = "<invalid name>" confidence = 0 simplecommands = { "help": "[ 帮助信息 ] 本bot拥有以下强力技能: say time gender version <-- 主动技能; 整点消息 网页信息 图片信息 Github项目信息 [更多功能开发中...] <-- 被动技能", "version": "PyIrcBot | https://github.com/BruceZhang1993/PyIrcBot | Version: %s" % self.version, "say": "%s" % (args), "time": "%s: 当前时间: %s" % (nick, ctime), "gender": "%s: [ 性别猜测 ] 姓名: %s => 性别: %s; 可信度: %.2f%%" % (nick, args, gender, confidence * 100) } c = self.connection if cmd in simplecommands.keys(): c.privmsg(channel, simplecommands[cmd]) elif cmd == "quit": if nick in self.admins and user == nick: c.quit("admin %s asked me to quit." % nick) sys.exit(0) else: c.privmsg(channel, "%s: 就不粗去,喵~" % nick) # elif cmd == "fast-lqy": # c.privmsg(self.channel, ".LQYMGTF") # elif cmd == "send2qq": # requests.get( # "http://localhost:3200/send?type=group&to=Test&msg=" + args) # c.privmsg(self.channel, "%s: 消息已同步至QQ群" % nick) else: return False
def play(self): for dialogue in self.get_dialogues(): if dialogue.speaker not in self.speaker_tones: if dialogue.speaker: gender = ngender.guess(dialogue.speaker)[0] tone = self.default_male_tone.clone( ) if gender == 'male' else self.default_female_tone.clone( ) tone.alias = dialogue.speaker else: tone = self.voiceover_tone.clone() tone.alias = 'VoiceOver' self.speaker_tones[dialogue.speaker] = tone self.baidu_speech.append_speech( dialogue.line, self.speaker_tones[dialogue.speaker]) self.baidu_speech.play(export_only=True)
def execute_command(self, cmd, args, nick, user, channel): # DONE:0 Finish function command_string # TODO:0 More commands and command interface ctime = strftime("%Y-%m-%d %H:%M:%S") try: (gender, confidence) = ngender.guess(args) if gender == "male": gender = "男" elif gender == "female": gender = "女" else: gender = "未知" except Exception: gender = "<invalid name>" confidence = 0 simplecommands = { "help": "[ 帮助信息 ] 本bot拥有以下强力技能: say time gender version <-- 主动技能; 整点消息 网页信息 图片信息 Github项目信息 [更多功能开发中...] <-- 被动技能", "version": "PyIrcBot | https://github.com/BruceZhang1993/PyIrcBot | Version: %s" % self.version, "say": "%s" % (args), "time": "%s: 当前时间: %s" % (nick, ctime), "gender": "%s: [ 性别猜测 ] 姓名: %s => 性别: %s; 可信度: %.2f%%" % (nick, args, gender, confidence * 100) } c = self.connection if cmd in simplecommands.keys(): c.privmsg(channel, simplecommands[cmd]) elif cmd == "quit": if nick in self.admins and user == nick: c.quit("admin %s asked me to quit." % nick) sys.exit(0) else: c.privmsg(channel, "%s: 就不粗去,喵~" % nick) # elif cmd == "fast-lqy": # c.privmsg(self.channel, ".LQYMGTF") # elif cmd == "send2qq": # requests.get( # "http://localhost:3200/send?type=group&to=Test&msg=" + args) # c.privmsg(self.channel, "%s: 消息已同步至QQ群" % nick) else: return False
def select_name(surname, gender, hour, attr, name_source, wuxing_dict, difficulty_dict, modal_particles, enableScoring, easy_mode, cutoff_score, num_of_matches=5): ''' Select name based on Wuxing attributes and difficulty of the words' pinyin syllables gender = M: select words from <chuci> gender = F: select words from <shijing> hour = 0 ~ 23: the exact hour when the baby was born attr: list of wuxing attributes name_source: the dictionary is used for name picking wuxing_dict: wuxing dictionary, input - Chinese word; output -wuxing difficulty_dict: pinyin syllable difficulty dictionary, input - pinyin syllable; output - difficulty level e.g., 'Hao' - Low, 'Zuo' - High, 'Xuan' - Very High enableScoring: True - get name score from some online website easy_mode: True - turn on easy-to-pronounce mode cutoff_score: int - the cutoff value below which the name will not be considered ''' sur_type = 1 if len(surname) == 1 else 2 match_count = 0 name_tuples = [] full_names = [] name_syllables = [] name_scores = [] found_names = get_name_from_wuxing(gender, attr, name_source, wuxing_dict, modal_particles) count = 0 while match_count < num_of_matches and count < SEARCH_LIMIT and count < len( found_names): #name = found_names[random.randint(0, len(found_names) - 1)] # randomly pick a name from the matched names name = found_names[ count] # exhaust all found names or until while condition is not met count += 1 full_name = surname + name # Match gender and general name word. if gender != ngender.guess(full_name)[0][0].upper(): continue # if name already exists, skip it if full_name in full_names: continue print('picked name: {}'.format(full_name)) name_vec = lazy_pinyin(name) letters = 0 isHard = False for n in name_vec: letters += len(n) n = capitalize_first_letter(n) if n in difficulty_dict: if is_no_easier_than(difficulty_dict[n], 'High') and easy_mode: isHard = True if letters > 6 or isHard: # if any of the syllables is hard for English speakers, just skip it print('pinyin is too long or too hard') continue if enableScoring: score = name_score(full_name, sur_type) print('score is {0:2d}'.format(score)) if score < cutoff_score: print('Score is below the cutoff value. Continue searching...') continue # skip those score is below the cutoff value name_scores.append(score) full_names.append(full_name) name_syllables.append('-'.join(lazy_pinyin(name))) match_count += 1 print('no. of matches = {}'.format(match_count)) if (count == SEARCH_LIMIT): # if the search limit is hit, show the warning print('Search limit {} is hit! Stop searching!'.format(SEARCH_LIMIT)) print('Searching Done!\nName, Pinyin, Score(Optional)') if enableScoring: # score the name_scores in a descending order # print out the names whose score higher than the threshold in a descending order of their scores indices = [ index for index, value in sorted( enumerate(name_scores), reverse=True, key=lambda x: x[1]) ] """with open('./name/babyname_{0}_{1}.csv'.format(surname, hour), 'w') as f: for index in indices: f.write(full_names[index] + ', ' + name_syllables[index] + ', ' + str(name_scores[index])) f.write('\n')""" for index in indices: name_tuples.append([ hour, full_names[index], name_syllables[index], str(name_scores[index]) ]) print('{}, {}, {}, {}'.format(hour, full_names[index], name_syllables[index], str(name_scores[index]))) else: # if name scoring is not requested, simply return the name tuples for i, name in enumerate(full_names): name_tuples.append([hour, name, name_syllables[i], 'N/A']) print('{}, {}, {}, N/A'.format(hour, full_names[i], name_syllables[i])) return name_tuples
#!/usr/bin/env python # -*- coding:utf-8 -*- import ngender names = ['阿宝', '阿彪', '阿城', '阿丑', '阿达'] for name in names: import re lang_re = re.compile(r'[^\u4e00-\u9FBF]', re.S) name = re.sub(lang_re, '', name) a = ngender.guess(name) print(a[0], a[1])
#coding=utf-8 import ngender r = ngender.guess('汪鹏') print r
def GuessSex(name): sex = ngender.guess(name) return sex[0]
result = False correct = 0 wrong = 0 gender_quiz = str() # welcome speech print( "Welcome to easy reading! Please let me know who you are and which level you are now!" ) # get user name name = input("\nPlease enter your name here: ") # Chinese Name only if '\u4e00' <= name <= '\u9fa0': gender = ngender.guess(name)[0] if gender == 'male': gender = 'boy' else: gender = 'girl' flag = False while flag == False: question = "Are you a " + gender + " ?(Y/N)" gender_confirmation = input(question) if gender_confirmation.lower() == 'y' or gender_confirmation.lower( ) == 'n': flag = True else: flag = False print('please re-enter Y / N, thanks')
or (data_daochu_deleteuseless[j].Organ.find('国家') != -1)): Sheet_pipei.write(count + 1, 25, '3') else: for items in shuangyiliu: if (data_daochu_deleteuseless[j].Organ.find(items) != -1): flag_shuangyiliu = 1 if (flag_shuangyiliu == 1): Sheet_pipei.write(count + 1, 25, '4') else: Sheet_pipei.write(count + 1, 25, '5') if (check_contain_chinese( data_daochu_deleteuseless[j].FirstDuty)): gender_this = ngender.guess( data_daochu_deleteuseless[j].FirstDuty) if (gender_this[0] == 'male'): Sheet_pipei.write(count + 1, 20, '男') Sheet_pipei.write(count + 1, 21, gender_this[1]) Sheet_pipei.write(count + 1, 22, '1') elif (gender_this[0] == 'female'): Sheet_pipei.write(count + 1, 20, '女') Sheet_pipei.write(count + 1, 21, gender_this[1]) Sheet_pipei.write(count + 1, 22, '0') Author_temp = data_beiyin[i].Author.rstrip(';') count_fen = 0 if (Author_temp.find('课题组') != -1): count_fen = 10 else:
import ngender import time name = input("请输入您的名字") # time.sleep(5) print(ngender.guess(name))
query = ("select id, Fname, Lname, IsChinese from telegram_user" + " where Gname= '" + groupname + "'") cursor.execute(query) row = cursor.fetchone() runquery = [] while (row != None): try: row = cursor.fetchone() print(row) gender = "" if (int(row[3] == None or row[3]) != 0): fullname = str(row[1]) + str(row[2]) result = ngender.guess(fullname) gender = result[0] else: #name = (str(row[0])).encode("utf-8") result = d.get_gender(row[1]) print(result) gender = result if (gender.find('male') != -1): gender = 'male' elif (gender.find('female') != -1): gender = 'female' runquery.append("update telegram_user set Gender = '" + str(gender) + "' where id = " + str(row[0])) except Exception as e: print("encounter an error, the error msg is the folloing:") print(e)
import ngender print(ngender.guess('杜昌源')) print(ngender.guess('葛敬哲'))