def resolveGender(self, name, country): '''Check if name is written in Cyrillic or Greek script, and transliterate''' if only_cyrillic_chars(name) or only_greek_chars(name): name = unidecode(name) '''Check if empty string''' if len(name.strip()) < 1: return None '''Initial check for gender-specific words at the beginning of the name''' f = name.split()[0] if f in self.maleWords: return 'male' elif f in self.femaleWords: return 'female' '''Extract first name from name string''' firstName = extractFirstName(name, 'direct') if country is not None: '''Start with suffixes Works well for Russians (can determine gender based on surname suffix)''' if country in self.suffixes.keys(): gender = self.suffixLookup(name, country) if gender is not None: return gender '''If still no luck, extract first name and try to resolve''' gender = self.resolveFirstName(firstName, country, True) if gender is not None: if gender == 'blacklist': return None return gender '''Try to inverse if no luck Hungarians use reversed first/last names order''' if country in self.invOrder: gender = self.suffixLookup(inverseNameParts(name), country) if gender is not None: return gender gender = self.resolveFirstName( extractFirstName(name, 'inverse'), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''Starting to get desperate by now. Assume name is in fact username, and try different tricks:''' if len(name.split()) == 1: '''- Try the Dutch tricks''' if country in ['Belgium', 'The Netherlands', 'South Africa']: positions = [m.start() for m in re.finditer('v', name)] bestMatch = [] if len(positions): for pos in positions: gender = self.resolveFirstName( name[:pos], country, True) if gender is not None: if gender != 'blacklist': bestMatch.append(gender) gender = next((g for g in bestMatch if g != 'unisex'), None) if gender is not None: return gender if 'unisex' in bestMatch: return 'unisex' '''- Try to guess first name from: bogdanv, vbogdan''' # bogdanv gender = self.resolveFirstName(name[:-1].lower(), country, True) if gender is not None: if gender == 'blacklist': return None return gender # vbogdan gender = self.resolveFirstName(name[1:].lower(), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''I can't believe I'm trying leet''' nameL = leet2eng(name) gender = self.resolveFirstName(extractFirstName(nameL, 'direct'), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''Try also the unidecoded version''' dname = unidecode(name) gender = self.resolveFirstName(extractFirstName(dname, 'direct'), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''If everything failed, try cross-country''' gender = self.resolveFirstNameOverall(firstName, True) if gender is not None: if gender == 'blacklist': return None return gender '''Try also unidecoded version''' dname = unidecode(name) gender = self.resolveFirstNameOverall( extractFirstName(dname, 'direct'), True) if gender is not None: if gender == 'blacklist': return None return gender if len(name.split()) == 1: '''- Try to guess first name from: bogdanv, vbogdan''' # bogdanv gender = self.resolveFirstNameOverall(name[:-1].lower(), True) if gender is not None: if gender == 'blacklist': return None return gender # vbogdan gender = self.resolveFirstNameOverall(name[1:].lower(), True) if gender is not None: if gender == 'blacklist': return None return gender return None
def resolveGender(self, name, country): '''Check if name is written in Cyrillic or Greek script, and transliterate''' if only_cyrillic_chars(name) or only_greek_chars(name): name = unidecode(name) '''Initial check for gender-specific words at the beginning of the name''' f = name.split()[0] if f in self.maleWords: return 'male' elif f in self.femaleWords: return 'female' '''Extract first name from name string''' firstName = extractFirstName(name, 'direct') if country is not None: '''Start with suffixes Works well for Russians (can determine gender based on surname suffix)''' if country in self.suffixes.keys(): gender = self.suffixLookup(name, country) if gender is not None: return gender '''If still no luck, extract first name and try to resolve''' gender = self.resolveFirstName(firstName, country, True) if gender is not None: if gender == 'blacklist': return None return gender '''Try to inverse if no luck Hungarians use reversed first/last names order''' if country in self.invOrder: gender = self.suffixLookup(inverseNameParts(name), country) if gender is not None: return gender gender = self.resolveFirstName(extractFirstName(name, 'inverse'), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''Starting to get desperate by now. Assume name is in fact username, and try different tricks:''' if len(name.split()) == 1: '''- Try the Dutch tricks''' if country in ['Belgium', 'The Netherlands', 'South Africa']: positions = [m.start() for m in re.finditer('v', name)] bestMatch = [] if len(positions): for pos in positions: gender = self.resolveFirstName(name[:pos], country, True) if gender is not None: if gender != 'blacklist': bestMatch.append(gender) gender = next((g for g in bestMatch if g != 'unisex'), None) if gender is not None: return gender if 'unisex' in bestMatch: return 'unisex' '''- Try to guess first name from: bogdanv, vbogdan''' # bogdanv gender = self.resolveFirstName(name[:-1].lower(), country, True) if gender is not None: if gender == 'blacklist': return None return gender # vbogdan gender = self.resolveFirstName(name[1:].lower(), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''I can't believe I'm trying leet''' nameL = leet2eng(name) gender = self.resolveFirstName(extractFirstName(nameL, 'direct'), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''Try also the unidecoded version''' dname = unidecode(name) gender = self.resolveFirstName(extractFirstName(dname, 'direct'), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''If everything failed, try cross-country''' gender = self.resolveFirstNameOverall(firstName, True) if gender is not None: if gender == 'blacklist': return None return gender '''Try also unidecoded version''' dname = unidecode(name) gender = self.resolveFirstNameOverall(extractFirstName(dname, 'direct'), True) if gender is not None: if gender == 'blacklist': return None return gender if len(name.split()) == 1: '''- Try to guess first name from: bogdanv, vbogdan''' # bogdanv gender = self.resolveFirstNameOverall(name[:-1].lower(), True) if gender is not None: if gender == 'blacklist': return None return gender # vbogdan gender = self.resolveFirstNameOverall(name[1:].lower(), True) if gender is not None: if gender == 'blacklist': return None return gender return None
def resolveGenderByCountry(self, name, country, script='Latin'): (gender, conf) = self.initialCheckName(name) if gender is not None: return (gender, conf) '''Extract first name from name string''' firstName = extractFirstName(name, 'direct') if country is not None: '''Start with suffixes Works well for Russians (can determine gender based on surname suffix)''' if country in self.suffixes.keys(): (gender, conf) = self.suffixLookup(name, country) if gender is not None: return (gender, conf) '''If still no luck, extract first name and try to resolve''' (gender, conf) = self.resolveFirstName(firstName, country, True, script) if gender is not None: if gender == 'blacklist': return (None, 0) return (gender, conf) '''Try to inverse if no luck Hungarians use reversed first/last names order''' if country in self.invOrder: (gender, conf) = self.suffixLookup(inverseNameParts(name), country) if gender is not None: return (gender, conf) (gender, conf) = self.resolveFirstName( extractFirstName(name, 'inverse'), country, True, script) if gender is not None: if gender == 'blacklist': return (None, 0) return (gender, conf) '''Starting to get desperate by now. Assume name is in fact username, and try different tricks:''' if len(name.split()) == 1: '''- Try the Dutch tricks''' if country in ['Belgium', 'The Netherlands', 'South Africa']: positions = [m.start() for m in re.finditer('v', name)] bestMatch = [] if len(positions): for pos in positions: (gender, conf) = self.resolveFirstName( name[:pos], country, True, script) if gender is not None: if gender != 'blacklist': bestMatch.append(gender) gender = next((g for g in bestMatch if g != 'unisex'), None) if gender is not None: return (gender, conf) if 'unisex' in bestMatch: return ('unisex', 0) '''- Try to guess first name from: bogdanv, vbogdan''' #bogdan (gender, conf) = self.resolveFirstNameOverall(name.lower(), True, script) if gender is not None: if gender == 'blacklist': return (None, 0) return (gender, conf) # bogdanv (gender, conf) = self.resolveFirstName(name[:-1].lower(), country, True, script) if gender is not None: if gender == 'blacklist': return (None, 0) return (gender, conf) # vbogdan (gender, conf) = self.resolveFirstName(name[1:].lower(), country, True, script) if gender is not None: if gender == 'blacklist': return (None, 0) return (gender, conf) '''I can't believe I'm trying leet''' nameL = leet2eng(name) (gender, conf) = self.resolveFirstName(extractFirstName(nameL, 'direct'), country, True, script) if gender is not None: if gender == 'blacklist': return (None, 0) return (gender, conf) '''Try also the unidecoded version''' dname = unidecode(name) (gender, conf) = self.resolveFirstName(extractFirstName(dname, 'direct'), country, True, script) if gender is not None: if gender == 'blacklist': return (None, 0) return (gender, conf) return (None, 0)
def resolveGenderByCountry(self, name, country, script = 'Latin'): (gender,conf) = self.initialCheckName(name) if gender is not None: return (gender,conf) '''Extract first name from name string''' firstName = extractFirstName(name, 'direct') if country is not None: '''Start with suffixes Works well for Russians (can determine gender based on surname suffix)''' if country in self.suffixes.keys(): (gender,conf) = self.suffixLookup(name, country) if gender is not None: return (gender,conf) '''If still no luck, extract first name and try to resolve''' (gender,conf) = self.resolveFirstName(firstName, country, True, script) if gender is not None: if gender == 'blacklist': return (None,0) return (gender, conf) '''Try to inverse if no luck Hungarians use reversed first/last names order''' if country in self.invOrder: (gender, conf) = self.suffixLookup(inverseNameParts(name), country) if gender is not None: return (gender, conf) (gender, conf) = self.resolveFirstName(extractFirstName(name, 'inverse'), country, True, script) if gender is not None: if gender == 'blacklist': return (None, 0) return (gender, conf) '''Starting to get desperate by now. Assume name is in fact username, and try different tricks:''' if len(name.split()) == 1: '''- Try the Dutch tricks''' if country in ['Belgium', 'The Netherlands', 'South Africa']: positions = [m.start() for m in re.finditer('v', name)] bestMatch = [] if len(positions): for pos in positions: (gender, conf) = self.resolveFirstName(name[:pos], country, True, script) if gender is not None: if gender != 'blacklist': bestMatch.append(gender) gender = next((g for g in bestMatch if g != 'unisex'), None) if gender is not None: return (gender, conf) if 'unisex' in bestMatch: return ('unisex',0) '''- Try to guess first name from: bogdanv, vbogdan''' #bogdan (gender,conf) = self.resolveFirstNameOverall(name.lower(), True, script) if gender is not None: if gender == 'blacklist': return (None,0) return (gender,conf) # bogdanv (gender, conf) = self.resolveFirstName(name[:-1].lower(), country, True, script) if gender is not None: if gender == 'blacklist': return (None,0) return (gender, conf) # vbogdan (gender, conf) = self.resolveFirstName(name[1:].lower(), country, True, script) if gender is not None: if gender == 'blacklist': return (None, 0) return (gender, conf) '''I can't believe I'm trying leet''' nameL = leet2eng(name) (gender, conf) = self.resolveFirstName(extractFirstName(nameL, 'direct'), country, True, script) if gender is not None: if gender == 'blacklist': return (None,0) return (gender, conf) '''Try also the unidecoded version''' dname = unidecode(name) (gender, conf) = self.resolveFirstName(extractFirstName(dname, 'direct'), country, True, script) if gender is not None: if gender == 'blacklist': return (None, 0) return (gender, conf) return (None, 0)