Esempio n. 1
0
def filter_name(surname, gender, attr):
    """
    Filter names with gender, general name word and name score.
    :param surname: surname of name.
    :param gender: gender of name.
    :param attr: attribute list of name.
    :return: None
    """
    sur_type = 1 if len(surname) == 1 else 2
    db_obj = DBOP()
    name_tuple = db_obj.get_wuxing_name(attr)  # Get all match words
    match_count = 0
    while 1:
        if match_count == 5:
            break
        name_info = name_tuple[randint(0, len(name_tuple))]
        name_id, name = name_info[0], name_info[1]
        full_name = surname + name
        # Match gender and general name word.
        if gender != ngender.guess(
                full_name)[0][0].upper() or not db_obj.match_name_word(name):
            continue
        if not name_score(full_name, sur_type):
            continue
        print("[-] 候选名字:%s" % full_name)
        match_count += 1
        name_source = db_obj.get_name_source(name_id)
        print("[-] 名字出处:")
        print(name_source[2])
        print(name_source[1] + '(' + name_source[0] + ')')
        print(name_source[3])
        print('\n')
Esempio n. 2
0
def main(xml):
    content = xml.find("Content").text
    if '垃圾' in content:
        index = content.find('垃圾')
        print(content[index + 3:len(content)])
        content = whatisit(content[index + 3:len(content)])
    elif '猜性别' in content:
        index = content.find('猜性别')
        if ngender.guess(content[index + 4:len(content)])[0] == 'male':
            print("(仅供娱乐) 我猜" + content[index + 4:len(content)] + "是" +
                  "一个帅气的男人!! ")
            content = "(仅供娱乐) 我猜" + content[index +
                                            4:len(content
                                                  )] + "是" + "一个帅气的男人!! "
        else:
            print("(仅供娱乐) 我猜" + content[index + 4:len(content)] + "是" +
                  "一个漂亮的仙女~~~")
            content = "(仅供娱乐) 我猜" + content[index +
                                            4:len(content
                                                  )] + "是" + "一个漂亮的仙女~~~"
    elif "拼音" in content:
        index = content.find('拼音')
        print(content[index + 3:len(content)])
        content = pingyin(content[index + 3:len(content)])
    elif "翻译" in content:
        index = content.find('翻译')
        print(content[index + 3:len(content)])
        content = fanyi(content[index + 3:len(content)])
    else:
        content = autoReply(content)

    return content
Esempio n. 3
0
def get_name_cell_phone(req, x):
    '''
    :param req: str格式的html页面
    :param x: 目录列表,获取公司名和地区用的
    :param sex: 性别,male男性 female女性,基于朴素贝叶斯计算的概率推测
    :return: [[姓名,手机号,地区,公司名],[姓名,手机号,地区,公司名]······]
    '''
    if req.strip():
        html = lxml.etree.HTML(req)
        list1 = html.xpath('//ul[@class=\"con-txt\"]//label[contains(text(),"联系人:")]/following-sibling::a/text()')
        if not list1:
            list1 = html.xpath('//ul[@class=\"con-txt\"]//label[contains(text(),"联系人:")]/ancestor::li/text()')
        # TODO: xpath把这个特殊编码解析了无法转回来,只能正则匹配了
        # phone = html.xpath('//ul[@class=\"con-txt\"]//label[contains(text(),"手机:")]/following-sibling::span[1]/text()')
        if list1:
            try:
                # 过滤女性,非中文会报错
                unknown = ngender.guess(list1[0])
                # todo:返回格式('male', 0.9836229687547046)  预测的性别,男/女性化的深度
                if unknown[0] == 'male' or (unknown[0] == 'female' and unknown[1] < 0.7):
                    # 获取手机号原加密格式
                    phone = search("(?s)<span class='secret'>(.*?)</span>", req)
                    if phone:
                        # 解密,并添加地区和公司名
                        list2 = [int(sub('&#x', '0x', x), 16) for x in phone.group(1).split(';')[:-1]]
                        list1.extend([''.join([str(i - list2[0] + 1) for i in list2])+'\t', x[2], x[1]])
                        return list1
            except:
                # 获取手机号原加密格式
                phone = search("(?s)<span class='secret'>(.*?)</span>", req)
                if phone:
                    # 解密,并添加地区和公司名
                    list2 = [int(sub('&#x', '0x', x), 16) for x in phone.group(1).split(';')[:-1]]
                    list1.extend([''.join([str(i - list2[0] + 1) for i in list2])+'\t', x[2], x[1]])
                    return list1
Esempio n. 4
0
 def test_name(self):
     self.assertEqual(guess("李胜男")[0], 'female')
     self.assertEqual(guess("李胜")[0], 'male')
     self.assertEqual(guess("李男")[0], 'male')
     self.assertEqual(guess("李招弟")[0], 'female')
     self.assertEqual(guess("李招")[0], 'male')
     self.assertEqual(guess("李弟")[0], 'male')
Esempio n. 5
0
def get_subpage_info(url_pool):
    mentor_list = []
    for sub_url in url_pool:
        url = url_base + sub_url
        response = urllib2.urlopen(url)
        html = response.read()
        #print html
        #print type(html)
        comp = re.compile(
            r'<span style="font-family: 宋体">(.*?)</span></span></strong>')
        names = comp.findall(html)
        for name in names:
            mentor = {}
            name, title = name.split(r'(')
            title = title.rstrip(')')
            gender = ngender.guess(name)
            mentor['name'] = name
            mentor['gender'] = gender
            mentor['title'] = title
            mentor['department'] = sub_url
            mentor_list.append(mentor)
        print url
        if len(names) != 0:
            continue
        else:
            comp = re.compile(
                r'<div align="center" style="height: 30px">(.*?)</div>')
            names = comp.findall(html)
            for name in names:
                mentor = {}
                mentor['name'] = name
                gender = ngender.guess(name)
                mentor['gender'] = gender
                mentor['title'] = 'unknown'
                mentor['department'] = sub_url
                mentor_list.append(mentor)
    return mentor_list
Esempio n. 6
0
    def execute_command(self, cmd, args, nick, user, channel):
        # DONE:0 Finish function command_string
        # TODO:0 More commands and command interface
        ctime = strftime("%Y-%m-%d %H:%M:%S")
        try:
            (gender, confidence) = ngender.guess(args)
            if gender == "male":
                gender = "男"
            elif gender == "female":
                gender = "女"
            else:
                gender = "未知"
        except Exception:
            gender = "<invalid name>"
            confidence = 0

        simplecommands = {
            "help":
            "[ 帮助信息 ] 本bot拥有以下强力技能: say time gender version <-- 主动技能; 整点消息 网页信息 图片信息 Github项目信息 [更多功能开发中...] <-- 被动技能",
            "version":
            "PyIrcBot | https://github.com/BruceZhang1993/PyIrcBot | Version: %s"
            % self.version,
            "say":
            "%s" % (args),
            "time":
            "%s: 当前时间: %s" % (nick, ctime),
            "gender":
            "%s: [ 性别猜测 ] 姓名: %s => 性别: %s; 可信度: %.2f%%" %
            (nick, args, gender, confidence * 100)
        }

        c = self.connection
        if cmd in simplecommands.keys():
            c.privmsg(channel, simplecommands[cmd])
        elif cmd == "quit":
            if nick in self.admins and user == nick:
                c.quit("admin %s asked me to quit." % nick)
                sys.exit(0)
            else:
                c.privmsg(channel, "%s: 就不粗去,喵~" % nick)
        # elif cmd == "fast-lqy":
        #     c.privmsg(self.channel, ".LQYMGTF")
        # elif cmd == "send2qq":
        #     requests.get(
        #         "http://localhost:3200/send?type=group&to=Test&msg=" + args)
        #     c.privmsg(self.channel, "%s: 消息已同步至QQ群" % nick)
        else:
            return False
Esempio n. 7
0
 def play(self):
     for dialogue in self.get_dialogues():
         if dialogue.speaker not in self.speaker_tones:
             if dialogue.speaker:
                 gender = ngender.guess(dialogue.speaker)[0]
                 tone = self.default_male_tone.clone(
                 ) if gender == 'male' else self.default_female_tone.clone(
                 )
                 tone.alias = dialogue.speaker
             else:
                 tone = self.voiceover_tone.clone()
                 tone.alias = 'VoiceOver'
             self.speaker_tones[dialogue.speaker] = tone
         self.baidu_speech.append_speech(
             dialogue.line, self.speaker_tones[dialogue.speaker])
     self.baidu_speech.play(export_only=True)
Esempio n. 8
0
    def execute_command(self, cmd, args, nick, user, channel):
        # DONE:0 Finish function command_string
        # TODO:0 More commands and command interface
        ctime = strftime("%Y-%m-%d %H:%M:%S")
        try:
            (gender, confidence) = ngender.guess(args)
            if gender == "male":
                gender = "男"
            elif gender == "female":
                gender = "女"
            else:
                gender = "未知"
        except Exception:
            gender = "<invalid name>"
            confidence = 0

        simplecommands = {
            "help": "[ 帮助信息 ] 本bot拥有以下强力技能: say time gender version <-- 主动技能; 整点消息 网页信息 图片信息 Github项目信息 [更多功能开发中...] <-- 被动技能",
            "version": "PyIrcBot | https://github.com/BruceZhang1993/PyIrcBot | Version: %s" % self.version,
            "say": "%s" % (args),
            "time": "%s: 当前时间: %s" % (nick, ctime),
            "gender": "%s: [ 性别猜测 ] 姓名: %s => 性别: %s; 可信度: %.2f%%" % (nick, args, gender, confidence * 100)
        }

        c = self.connection
        if cmd in simplecommands.keys():
            c.privmsg(channel, simplecommands[cmd])
        elif cmd == "quit":
            if nick in self.admins and user == nick:
                c.quit("admin %s asked me to quit." % nick)
                sys.exit(0)
            else:
                c.privmsg(channel,
                          "%s: 就不粗去,喵~" % nick)
        # elif cmd == "fast-lqy":
        #     c.privmsg(self.channel, ".LQYMGTF")
        # elif cmd == "send2qq":
        #     requests.get(
        #         "http://localhost:3200/send?type=group&to=Test&msg=" + args)
        #     c.privmsg(self.channel, "%s: 消息已同步至QQ群" % nick)
        else:
            return False
Esempio n. 9
0
def select_name(surname,
                gender,
                hour,
                attr,
                name_source,
                wuxing_dict,
                difficulty_dict,
                modal_particles,
                enableScoring,
                easy_mode,
                cutoff_score,
                num_of_matches=5):
    '''
    Select name based on Wuxing attributes and difficulty of the words' pinyin syllables
    gender = M: select words from <chuci>
    gender = F: select words from <shijing>
    hour = 0 ~ 23: the exact hour when the baby was born
    attr: list of wuxing attributes
	name_source: the dictionary is used for name picking
    wuxing_dict: wuxing dictionary, input - Chinese word; output -wuxing
    difficulty_dict: pinyin syllable difficulty dictionary, input - pinyin syllable; output - difficulty level
                     e.g., 'Hao' - Low, 'Zuo' - High, 'Xuan' - Very High 
    enableScoring: True - get name score from some online website
    easy_mode: True - turn on easy-to-pronounce mode
    cutoff_score: int - the cutoff value below which the name will not be considered
    '''
    sur_type = 1 if len(surname) == 1 else 2
    match_count = 0
    name_tuples = []
    full_names = []
    name_syllables = []
    name_scores = []
    found_names = get_name_from_wuxing(gender, attr, name_source, wuxing_dict,
                                       modal_particles)
    count = 0
    while match_count < num_of_matches and count < SEARCH_LIMIT and count < len(
            found_names):
        #name = found_names[random.randint(0, len(found_names) - 1)] # randomly pick a name from the matched names
        name = found_names[
            count]  # exhaust all found names or until while condition is not met
        count += 1
        full_name = surname + name
        # Match gender and general name word.
        if gender != ngender.guess(full_name)[0][0].upper():
            continue
        # if name already exists, skip it
        if full_name in full_names:
            continue
        print('picked name: {}'.format(full_name))
        name_vec = lazy_pinyin(name)
        letters = 0
        isHard = False
        for n in name_vec:
            letters += len(n)
            n = capitalize_first_letter(n)
            if n in difficulty_dict:
                if is_no_easier_than(difficulty_dict[n], 'High') and easy_mode:
                    isHard = True
        if letters > 6 or isHard:  # if any of the syllables is hard for English speakers, just skip it
            print('pinyin is too long or too hard')
            continue

        if enableScoring:
            score = name_score(full_name, sur_type)
            print('score is {0:2d}'.format(score))
            if score < cutoff_score:
                print('Score is below the cutoff value. Continue searching...')
                continue  # skip those score is below the cutoff value
            name_scores.append(score)

        full_names.append(full_name)
        name_syllables.append('-'.join(lazy_pinyin(name)))

        match_count += 1
        print('no. of matches = {}'.format(match_count))

    if (count == SEARCH_LIMIT):  # if the search limit is hit, show the warning
        print('Search limit {} is hit! Stop searching!'.format(SEARCH_LIMIT))

    print('Searching Done!\nName, Pinyin, Score(Optional)')

    if enableScoring:
        # score the name_scores in a descending order
        # print out the names whose score higher than the threshold in a descending order of their scores
        indices = [
            index for index, value in sorted(
                enumerate(name_scores), reverse=True, key=lambda x: x[1])
        ]
        """with open('./name/babyname_{0}_{1}.csv'.format(surname, hour), 'w') as f:
            for index in indices:
                f.write(full_names[index] + ', ' + name_syllables[index] + ', ' + str(name_scores[index]))
                f.write('\n')"""
        for index in indices:
            name_tuples.append([
                hour, full_names[index], name_syllables[index],
                str(name_scores[index])
            ])
            print('{}, {}, {}, {}'.format(hour, full_names[index],
                                          name_syllables[index],
                                          str(name_scores[index])))

    else:
        # if name scoring is not requested, simply return the name tuples
        for i, name in enumerate(full_names):
            name_tuples.append([hour, name, name_syllables[i], 'N/A'])
            print('{}, {}, {}, N/A'.format(hour, full_names[i],
                                           name_syllables[i]))

    return name_tuples
Esempio n. 10
0
#!/usr/bin/env python
# -*- coding:utf-8 -*-

import ngender
names = ['阿宝', '阿彪', '阿城', '阿丑', '阿达']
for name in names:
    import re
    lang_re = re.compile(r'[^\u4e00-\u9FBF]', re.S)
    name = re.sub(lang_re, '', name)
    a = ngender.guess(name)
    print(a[0], a[1])
Esempio n. 11
0
#coding=utf-8

import ngender
r = ngender.guess('汪鹏')
print r


Esempio n. 12
0
def GuessSex(name):
    sex = ngender.guess(name)
    return sex[0]
result = False
correct = 0
wrong = 0
gender_quiz = str()

# welcome speech
print(
    "Welcome to easy reading! Please let me know who you are and which level you are now!"
)
# get user name
name = input("\nPlease enter your name here: ")

# Chinese Name only
if '\u4e00' <= name <= '\u9fa0':

    gender = ngender.guess(name)[0]
    if gender == 'male':
        gender = 'boy'
    else:
        gender = 'girl'

    flag = False
    while flag == False:
        question = "Are you a " + gender + " ?(Y/N)"
        gender_confirmation = input(question)
        if gender_confirmation.lower() == 'y' or gender_confirmation.lower(
        ) == 'n':
            flag = True
        else:
            flag = False
            print('please re-enter Y / N, thanks')
                      or
                      (data_daochu_deleteuseless[j].Organ.find('国家') != -1)):
                    Sheet_pipei.write(count + 1, 25, '3')
                else:
                    for items in shuangyiliu:
                        if (data_daochu_deleteuseless[j].Organ.find(items) !=
                                -1):
                            flag_shuangyiliu = 1
                    if (flag_shuangyiliu == 1):
                        Sheet_pipei.write(count + 1, 25, '4')
                    else:
                        Sheet_pipei.write(count + 1, 25, '5')

                if (check_contain_chinese(
                        data_daochu_deleteuseless[j].FirstDuty)):
                    gender_this = ngender.guess(
                        data_daochu_deleteuseless[j].FirstDuty)

                    if (gender_this[0] == 'male'):
                        Sheet_pipei.write(count + 1, 20, '男')
                        Sheet_pipei.write(count + 1, 21, gender_this[1])
                        Sheet_pipei.write(count + 1, 22, '1')
                    elif (gender_this[0] == 'female'):
                        Sheet_pipei.write(count + 1, 20, '女')
                        Sheet_pipei.write(count + 1, 21, gender_this[1])
                        Sheet_pipei.write(count + 1, 22, '0')

                Author_temp = data_beiyin[i].Author.rstrip(';')
                count_fen = 0
                if (Author_temp.find('课题组') != -1):
                    count_fen = 10
                else:
Esempio n. 15
0
import ngender
import time
name = input("请输入您的名字")
# time.sleep(5)
print(ngender.guess(name))
Esempio n. 16
0
query = ("select id, Fname, Lname, IsChinese from telegram_user" +
         " where Gname= '" + groupname + "'")

cursor.execute(query)

row = cursor.fetchone()
runquery = []
while (row != None):
    try:
        row = cursor.fetchone()
        print(row)
        gender = ""
        if (int(row[3] == None or row[3]) != 0):
            fullname = str(row[1]) + str(row[2])
            result = ngender.guess(fullname)
            gender = result[0]
        else:
            #name = (str(row[0])).encode("utf-8")
            result = d.get_gender(row[1])
            print(result)
            gender = result
            if (gender.find('male') != -1):
                gender = 'male'
            elif (gender.find('female') != -1):
                gender = 'female'
        runquery.append("update telegram_user set Gender = '" + str(gender) +
                        "' where id = " + str(row[0]))
    except Exception as e:
        print("encounter an error, the error msg is the folloing:")
        print(e)
Esempio n. 17
0
import ngender
print(ngender.guess('杜昌源'))
print(ngender.guess('葛敬哲'))