def resolveGender(self, name, country): # Check if name is written in Cyrillic or Greek script, and transliterate if only_cyrillic_chars(name) or only_greek_chars(name): name = unidecode(name) firstName = extractFirstName(name, 'direct') gender = self.tryCrossCountry(firstName) if gender is not None: return gender gender = self.tryUnidecoded(name) if gender is not None: return gender gender = self.tryRemovingFirstAndLastLetters(name) if gender is not None: return gender name = re.sub("\d+", "", name) gender = self.tryRemovingFirstAndLastLetters(name) if gender is not None: return gender return None
def initialCheckName(self, name): '''Check if name is written in Cyrillic or Greek script, and transliterate''' if only_cyrillic_chars(name) or only_greek_chars(name): name = unidecode(name) '''Initial check for gender-specific words at the beginning of the name''' f = name.split()[0] if f in self.maleWords: conf = 1 return ('male', conf) elif f in self.femaleWords: conf = 1 return ('female', conf) '''Check for gender-specific words at the second part of the name''' if len(name.split()) > 1: l = name.split()[1] if l in self.maleWords: conf = 1 return ('male', conf) elif l in self.femaleWords: conf = 1 return ('female', conf) return (None, 0)
def initialCheckName(self, name): '''Check if name is written in Cyrillic or Greek script, and transliterate''' if only_cyrillic_chars(name) or only_greek_chars(name): name = unidecode(name) '''Initial check for gender-specific words at the beginning of the name''' f = name.split()[0] if f in self.maleWords: conf = 1 return ('male',conf) elif f in self.femaleWords: conf = 1 return ('female', conf) '''Check for gender-specific words at the second part of the name''' if len(name.split())> 1: l = name.split()[1] if l in self.maleWords: conf = 1 return ('male',conf) elif l in self.femaleWords: conf = 1 return ('female', conf) return (None,0)
def resolveGender(self, name, country): '''Check if name is written in Cyrillic or Greek script, and transliterate''' if only_cyrillic_chars(name) or only_greek_chars(name): name = unidecode(name) '''Check if empty string''' if len(name.strip()) < 1: return None '''Initial check for gender-specific words at the beginning of the name''' f = name.split()[0] if f in self.maleWords: return 'male' elif f in self.femaleWords: return 'female' '''Extract first name from name string''' firstName = extractFirstName(name, 'direct') if country is not None: '''Start with suffixes Works well for Russians (can determine gender based on surname suffix)''' if country in self.suffixes.keys(): gender = self.suffixLookup(name, country) if gender is not None: return gender '''If still no luck, extract first name and try to resolve''' gender = self.resolveFirstName(firstName, country, True) if gender is not None: if gender == 'blacklist': return None return gender '''Try to inverse if no luck Hungarians use reversed first/last names order''' if country in self.invOrder: gender = self.suffixLookup(inverseNameParts(name), country) if gender is not None: return gender gender = self.resolveFirstName( extractFirstName(name, 'inverse'), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''Starting to get desperate by now. Assume name is in fact username, and try different tricks:''' if len(name.split()) == 1: '''- Try the Dutch tricks''' if country in ['Belgium', 'The Netherlands', 'South Africa']: positions = [m.start() for m in re.finditer('v', name)] bestMatch = [] if len(positions): for pos in positions: gender = self.resolveFirstName( name[:pos], country, True) if gender is not None: if gender != 'blacklist': bestMatch.append(gender) gender = next((g for g in bestMatch if g != 'unisex'), None) if gender is not None: return gender if 'unisex' in bestMatch: return 'unisex' '''- Try to guess first name from: bogdanv, vbogdan''' # bogdanv gender = self.resolveFirstName(name[:-1].lower(), country, True) if gender is not None: if gender == 'blacklist': return None return gender # vbogdan gender = self.resolveFirstName(name[1:].lower(), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''I can't believe I'm trying leet''' nameL = leet2eng(name) gender = self.resolveFirstName(extractFirstName(nameL, 'direct'), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''Try also the unidecoded version''' dname = unidecode(name) gender = self.resolveFirstName(extractFirstName(dname, 'direct'), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''If everything failed, try cross-country''' gender = self.resolveFirstNameOverall(firstName, True) if gender is not None: if gender == 'blacklist': return None return gender '''Try also unidecoded version''' dname = unidecode(name) gender = self.resolveFirstNameOverall( extractFirstName(dname, 'direct'), True) if gender is not None: if gender == 'blacklist': return None return gender if len(name.split()) == 1: '''- Try to guess first name from: bogdanv, vbogdan''' # bogdanv gender = self.resolveFirstNameOverall(name[:-1].lower(), True) if gender is not None: if gender == 'blacklist': return None return gender # vbogdan gender = self.resolveFirstNameOverall(name[1:].lower(), True) if gender is not None: if gender == 'blacklist': return None return gender return None
def resolveGender(self, name, country): '''Check if name is written in Cyrillic or Greek script, and transliterate''' if only_cyrillic_chars(name) or only_greek_chars(name): name = unidecode(name) '''Initial check for gender-specific words at the beginning of the name''' f = name.split()[0] if f in self.maleWords: return 'male' elif f in self.femaleWords: return 'female' '''Extract first name from name string''' firstName = extractFirstName(name, 'direct') if country is not None: '''Start with suffixes Works well for Russians (can determine gender based on surname suffix)''' if country in self.suffixes.keys(): gender = self.suffixLookup(name, country) if gender is not None: return gender '''If still no luck, extract first name and try to resolve''' gender = self.resolveFirstName(firstName, country, True) if gender is not None: if gender == 'blacklist': return None return gender '''Try to inverse if no luck Hungarians use reversed first/last names order''' if country in self.invOrder: gender = self.suffixLookup(inverseNameParts(name), country) if gender is not None: return gender gender = self.resolveFirstName(extractFirstName(name, 'inverse'), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''Starting to get desperate by now. Assume name is in fact username, and try different tricks:''' if len(name.split()) == 1: '''- Try the Dutch tricks''' if country in ['Belgium', 'The Netherlands', 'South Africa']: positions = [m.start() for m in re.finditer('v', name)] bestMatch = [] if len(positions): for pos in positions: gender = self.resolveFirstName(name[:pos], country, True) if gender is not None: if gender != 'blacklist': bestMatch.append(gender) gender = next((g for g in bestMatch if g != 'unisex'), None) if gender is not None: return gender if 'unisex' in bestMatch: return 'unisex' '''- Try to guess first name from: bogdanv, vbogdan''' # bogdanv gender = self.resolveFirstName(name[:-1].lower(), country, True) if gender is not None: if gender == 'blacklist': return None return gender # vbogdan gender = self.resolveFirstName(name[1:].lower(), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''I can't believe I'm trying leet''' nameL = leet2eng(name) gender = self.resolveFirstName(extractFirstName(nameL, 'direct'), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''Try also the unidecoded version''' dname = unidecode(name) gender = self.resolveFirstName(extractFirstName(dname, 'direct'), country, True) if gender is not None: if gender == 'blacklist': return None return gender '''If everything failed, try cross-country''' gender = self.resolveFirstNameOverall(firstName, True) if gender is not None: if gender == 'blacklist': return None return gender '''Try also unidecoded version''' dname = unidecode(name) gender = self.resolveFirstNameOverall(extractFirstName(dname, 'direct'), True) if gender is not None: if gender == 'blacklist': return None return gender if len(name.split()) == 1: '''- Try to guess first name from: bogdanv, vbogdan''' # bogdanv gender = self.resolveFirstNameOverall(name[:-1].lower(), True) if gender is not None: if gender == 'blacklist': return None return gender # vbogdan gender = self.resolveFirstNameOverall(name[1:].lower(), True) if gender is not None: if gender == 'blacklist': return None return gender return None