def resolveGender(self, name, country):
		# Check if name is written in Cyrillic or Greek script, and transliterate
		if only_cyrillic_chars(name) or only_greek_chars(name):
			name = unidecode(name)
		
		firstName = extractFirstName(name, 'direct')
		
		gender = self.tryCrossCountry(firstName)
		if gender is not None:
			return gender

		gender = self.tryUnidecoded(name)
		if gender is not None:
			return gender
		
		gender = self.tryRemovingFirstAndLastLetters(name)
		if gender is not None:
			return gender

		name = re.sub("\d+", "", name)

		gender = self.tryRemovingFirstAndLastLetters(name)
		if gender is not None:
			return gender

		return None
Beispiel #2
0
    def resolveGender(self, name, country):
        # Check if name is written in Cyrillic or Greek script, and transliterate
        if only_cyrillic_chars(name) or only_greek_chars(name):
            name = unidecode(name)

        firstName = extractFirstName(name, 'direct')

        gender = self.tryCrossCountry(firstName)
        if gender is not None:
            return gender

        gender = self.tryUnidecoded(name)
        if gender is not None:
            return gender

        gender = self.tryRemovingFirstAndLastLetters(name)
        if gender is not None:
            return gender

        name = re.sub("\d+", "", name)

        gender = self.tryRemovingFirstAndLastLetters(name)
        if gender is not None:
            return gender

        return None
Beispiel #3
0
    def resolveGender(self, name, country):
        '''Check if name is written in Cyrillic or Greek script, and transliterate'''
        if only_cyrillic_chars(name) or only_greek_chars(name):
            name = unidecode(name)
        '''Check if empty string'''
        if len(name.strip()) < 1:
            return None
        '''Initial check for gender-specific words at the beginning of the name'''
        f = name.split()[0]
        if f in self.maleWords:
            return 'male'
        elif f in self.femaleWords:
            return 'female'
        '''Extract first name from name string'''
        firstName = extractFirstName(name, 'direct')

        if country is not None:
            '''Start with suffixes
			Works well for Russians (can determine gender based on surname suffix)'''
            if country in self.suffixes.keys():
                gender = self.suffixLookup(name, country)
                if gender is not None:
                    return gender
            '''If still no luck, extract first name and try to resolve'''
            gender = self.resolveFirstName(firstName, country, True)
            if gender is not None:
                if gender == 'blacklist':
                    return None
                return gender
            '''Try to inverse if no luck
			Hungarians use reversed first/last names order'''
            if country in self.invOrder:
                gender = self.suffixLookup(inverseNameParts(name), country)
                if gender is not None:
                    return gender

                gender = self.resolveFirstName(
                    extractFirstName(name, 'inverse'), country, True)
                if gender is not None:
                    if gender == 'blacklist':
                        return None
                    return gender
            '''Starting to get desperate by now. Assume name is in fact username,
			and try different tricks:'''
            if len(name.split()) == 1:
                '''- Try the Dutch tricks'''
                if country in ['Belgium', 'The Netherlands', 'South Africa']:
                    positions = [m.start() for m in re.finditer('v', name)]
                    bestMatch = []
                    if len(positions):
                        for pos in positions:
                            gender = self.resolveFirstName(
                                name[:pos], country, True)
                            if gender is not None:
                                if gender != 'blacklist':
                                    bestMatch.append(gender)
                    gender = next((g for g in bestMatch if g != 'unisex'),
                                  None)
                    if gender is not None:
                        return gender
                    if 'unisex' in bestMatch:
                        return 'unisex'
                '''- Try to guess first name from: bogdanv, vbogdan'''
                # bogdanv
                gender = self.resolveFirstName(name[:-1].lower(), country,
                                               True)
                if gender is not None:
                    if gender == 'blacklist':
                        return None
                    return gender
                # vbogdan
                gender = self.resolveFirstName(name[1:].lower(), country, True)
                if gender is not None:
                    if gender == 'blacklist':
                        return None
                    return gender
            '''I can't believe I'm trying leet'''
            nameL = leet2eng(name)
            gender = self.resolveFirstName(extractFirstName(nameL, 'direct'),
                                           country, True)
            if gender is not None:
                if gender == 'blacklist':
                    return None
                return gender
            '''Try also the unidecoded version'''
            dname = unidecode(name)
            gender = self.resolveFirstName(extractFirstName(dname, 'direct'),
                                           country, True)
            if gender is not None:
                if gender == 'blacklist':
                    return None
                return gender
        '''If everything failed, try cross-country'''
        gender = self.resolveFirstNameOverall(firstName, True)
        if gender is not None:
            if gender == 'blacklist':
                return None
            return gender
        '''Try also unidecoded version'''
        dname = unidecode(name)
        gender = self.resolveFirstNameOverall(
            extractFirstName(dname, 'direct'), True)
        if gender is not None:
            if gender == 'blacklist':
                return None
            return gender

        if len(name.split()) == 1:
            '''- Try to guess first name from: bogdanv, vbogdan'''
            # bogdanv
            gender = self.resolveFirstNameOverall(name[:-1].lower(), True)
            if gender is not None:
                if gender == 'blacklist':
                    return None
                return gender


#			 vbogdan
            gender = self.resolveFirstNameOverall(name[1:].lower(), True)
            if gender is not None:
                if gender == 'blacklist':
                    return None
                return gender

        return None
Beispiel #4
0
    def resolveGenderByCountry(self, name, country, script='Latin'):
        (gender, conf) = self.initialCheckName(name)
        if gender is not None:
            return (gender, conf)
        '''Extract first name from name string'''
        firstName = extractFirstName(name, 'direct')

        if country is not None:
            '''Start with suffixes
			Works well for Russians (can determine gender based on surname suffix)'''
            if country in self.suffixes.keys():
                (gender, conf) = self.suffixLookup(name, country)
                if gender is not None:
                    return (gender, conf)
            '''If still no luck, extract first name and try to resolve'''
            (gender, conf) = self.resolveFirstName(firstName, country, True,
                                                   script)
            if gender is not None:
                if gender == 'blacklist':
                    return (None, 0)
                return (gender, conf)
            '''Try to inverse if no luck
			Hungarians use reversed first/last names order'''
            if country in self.invOrder:
                (gender, conf) = self.suffixLookup(inverseNameParts(name),
                                                   country)
                if gender is not None:
                    return (gender, conf)

                (gender, conf) = self.resolveFirstName(
                    extractFirstName(name, 'inverse'), country, True, script)
                if gender is not None:
                    if gender == 'blacklist':
                        return (None, 0)
                    return (gender, conf)
            '''Starting to get desperate by now. Assume name is in fact username,
			and try different tricks:'''
            if len(name.split()) == 1:
                '''- Try the Dutch tricks'''
                if country in ['Belgium', 'The Netherlands', 'South Africa']:
                    positions = [m.start() for m in re.finditer('v', name)]
                    bestMatch = []
                    if len(positions):
                        for pos in positions:
                            (gender, conf) = self.resolveFirstName(
                                name[:pos], country, True, script)
                            if gender is not None:
                                if gender != 'blacklist':
                                    bestMatch.append(gender)
                    gender = next((g for g in bestMatch if g != 'unisex'),
                                  None)
                    if gender is not None:
                        return (gender, conf)
                    if 'unisex' in bestMatch:
                        return ('unisex', 0)
                '''- Try to guess first name from: bogdanv, vbogdan'''
                #bogdan
                (gender,
                 conf) = self.resolveFirstNameOverall(name.lower(), True,
                                                      script)
                if gender is not None:
                    if gender == 'blacklist':
                        return (None, 0)
                    return (gender, conf)

                # bogdanv
                (gender, conf) = self.resolveFirstName(name[:-1].lower(),
                                                       country, True, script)
                if gender is not None:
                    if gender == 'blacklist':
                        return (None, 0)
                    return (gender, conf)
                # vbogdan
                (gender, conf) = self.resolveFirstName(name[1:].lower(),
                                                       country, True, script)
                if gender is not None:
                    if gender == 'blacklist':
                        return (None, 0)
                    return (gender, conf)
            '''I can't believe I'm trying leet'''
            nameL = leet2eng(name)
            (gender,
             conf) = self.resolveFirstName(extractFirstName(nameL, 'direct'),
                                           country, True, script)
            if gender is not None:
                if gender == 'blacklist':
                    return (None, 0)
                return (gender, conf)
            '''Try also the unidecoded version'''
            dname = unidecode(name)
            (gender,
             conf) = self.resolveFirstName(extractFirstName(dname, 'direct'),
                                           country, True, script)
            if gender is not None:
                if gender == 'blacklist':
                    return (None, 0)
                return (gender, conf)
        return (None, 0)
Beispiel #5
0
    def resolveGenderOverall(self, name, script='Latin'):
        (gender, conf) = self.initialCheckName(name)
        if gender is not None:
            return (gender, conf)
        '''Extract first name from name string'''
        firstName = extractFirstName(name, 'direct')
        '''If everything failed, try cross-country'''
        (gender, conf) = self.resolveFirstNameOverall(firstName, True, script)
        if gender is not None:
            if gender == 'blacklist':
                return (None, 0)
            return (gender, conf)
        '''Try also unidecoded version'''
        dname = unidecode(name)
        (gender, conf) = self.resolveFirstNameOverall(
            extractFirstName(dname, 'direct'), True, script)
        if gender is not None:
            if gender == 'blacklist':
                return (None, 0)
            return (gender, conf)
        '''Try also inverse names: (i.e. family name)'''
        (gender, conf) = self.resolveFirstNameOverall(
            extractFirstName(name, 'inverse'), True, script)
        if gender is not None:
            if gender == 'blacklist':
                return (None, 0)
            return (gender, conf)
        '''Try also middle names: (e.g. J. Marcos Nieto)'''
        if len(name.split()) == 3:
            mid = name.split()[1]
            (gender, conf) = self.resolveFirstNameOverall(mid, True, script)
            if gender is not None:
                if gender == 'blacklist':
                    return (None, 0)
                return (gender, conf)

        if len(name.split()) == 1:

            #bogdan
            (gender,
             conf) = self.resolveFirstNameOverall(name.lower(), True, script)
            if gender is not None:
                if gender == 'blacklist':
                    return (None, 0)
                return (gender, conf)
            '''- Try to guess first name from: bogdanv, vbogdan'''
            # bogdanv
            (gender,
             conf) = self.resolveFirstNameOverall(name[:-1].lower(), True,
                                                  script)
            if gender is not None:
                if gender == 'blacklist':
                    return (None, 0)
                return (gender, conf)
            #vbogdan
            (gender,
             conf) = self.resolveFirstNameOverall(name[1:].lower(), True,
                                                  script)
            if gender is not None:
                if gender == 'blacklist':
                    return (None, 0)
                return (gender, conf)

        return (gender, conf)
	def resolveGender(self, name, country):
		'''Check if name is written in Cyrillic or Greek script, and transliterate'''
		if only_cyrillic_chars(name) or only_greek_chars(name):
			name = unidecode(name)
		
		'''Initial check for gender-specific words at the beginning of the name'''
		f = name.split()[0]
		if f in self.maleWords:
			return 'male'
		elif f in self.femaleWords:
			return 'female'
		
		'''Extract first name from name string'''
		firstName = extractFirstName(name, 'direct')
		
		if country is not None:
			'''Start with suffixes
			Works well for Russians (can determine gender based on surname suffix)'''
			if country in self.suffixes.keys():
				gender = self.suffixLookup(name, country)
				if gender is not None:
					return gender
			'''If still no luck, extract first name and try to resolve'''
			gender = self.resolveFirstName(firstName, country, True)
			if gender is not None:
				if gender == 'blacklist':
					return None
				return gender
			
			'''Try to inverse if no luck
			Hungarians use reversed first/last names order'''
			if country in self.invOrder:
				gender = self.suffixLookup(inverseNameParts(name), country)
				if gender is not None:
					return gender
				
				gender = self.resolveFirstName(extractFirstName(name, 'inverse'), country, True)
				if gender is not None:
					if gender == 'blacklist':
						return None
					return gender
			
			'''Starting to get desperate by now. Assume name is in fact username,
			and try different tricks:'''
			if len(name.split()) == 1:
				'''- Try the Dutch tricks'''
				if country in ['Belgium', 'The Netherlands', 'South Africa']:
					positions = [m.start() for m in re.finditer('v', name)]
					bestMatch = []
					if len(positions):
						for pos in positions:
							gender = self.resolveFirstName(name[:pos], country, True)
							if gender is not None:
								if gender != 'blacklist':
									bestMatch.append(gender)
					gender = next((g for g in bestMatch if g != 'unisex'), None)
					if gender is not None:
						return gender
					if 'unisex' in bestMatch:
						return 'unisex'
				
				'''- Try to guess first name from: bogdanv, vbogdan'''
				# bogdanv
				gender = self.resolveFirstName(name[:-1].lower(), country, True)
				if gender is not None:
					if gender == 'blacklist':
						return None
					return gender
				# vbogdan
				gender = self.resolveFirstName(name[1:].lower(), country, True)
				if gender is not None:
					if gender == 'blacklist':
						return None
					return gender
			
			'''I can't believe I'm trying leet'''
			nameL = leet2eng(name)
			gender = self.resolveFirstName(extractFirstName(nameL, 'direct'), country, True)
			if gender is not None:
				if gender == 'blacklist':
					return None
				return gender
			
			'''Try also the unidecoded version'''
			dname = unidecode(name)
			gender = self.resolveFirstName(extractFirstName(dname, 'direct'), country, True)
			if gender is not None:
				if gender == 'blacklist':
					return None
				return gender
		
		'''If everything failed, try cross-country'''
		gender = self.resolveFirstNameOverall(firstName, True)
		if gender is not None:
			if gender == 'blacklist':
				return None
			return gender
		'''Try also unidecoded version'''
		dname = unidecode(name)
		gender = self.resolveFirstNameOverall(extractFirstName(dname, 'direct'), True)
		if gender is not None:
			if gender == 'blacklist':
				return None
			return gender
		
		if len(name.split()) == 1:
			'''- Try to guess first name from: bogdanv, vbogdan'''
			# bogdanv
			gender = self.resolveFirstNameOverall(name[:-1].lower(), True)
			if gender is not None:
				if gender == 'blacklist':
					return None
				return gender
#			 vbogdan
			gender = self.resolveFirstNameOverall(name[1:].lower(), True)
			if gender is not None:
				if gender == 'blacklist':
					return None
				return gender
				
		return None
	def tryUnidecoded(self, name):
		dname = unidecode(name)
		firstName = extractFirstName(dname, 'direct')
		return self.tryCrossCountry(firstName)
Beispiel #8
0
	def resolveGenderByCountry(self, name, country, script = 'Latin'):
		(gender,conf) = self.initialCheckName(name)
		if gender is not None:
			return (gender,conf)
		'''Extract first name from name string'''
		firstName = extractFirstName(name, 'direct')

		if country is not None:
			'''Start with suffixes
			Works well for Russians (can determine gender based on surname suffix)'''
			if country in self.suffixes.keys():
				(gender,conf) = self.suffixLookup(name, country)
				if gender is not None:
					return (gender,conf)
			'''If still no luck, extract first name and try to resolve'''
			(gender,conf) = self.resolveFirstName(firstName, country, True, script)
			if gender is not None:
				if gender == 'blacklist':
					return (None,0)
				return (gender, conf)

			'''Try to inverse if no luck
			Hungarians use reversed first/last names order'''
			if country in self.invOrder:
				(gender, conf) = self.suffixLookup(inverseNameParts(name), country)
				if gender is not None:
					return (gender, conf)

				(gender, conf) = self.resolveFirstName(extractFirstName(name, 'inverse'), country, True, script)
				if gender is not None:
					if gender == 'blacklist':
						return (None, 0)
					return (gender, conf)

			'''Starting to get desperate by now. Assume name is in fact username,
			and try different tricks:'''
			if len(name.split()) == 1:
				'''- Try the Dutch tricks'''
				if country in ['Belgium', 'The Netherlands', 'South Africa']:
					positions = [m.start() for m in re.finditer('v', name)]
					bestMatch = []
					if len(positions):
						for pos in positions:
							(gender, conf) = self.resolveFirstName(name[:pos], country, True, script)
							if gender is not None:
								if gender != 'blacklist':
									bestMatch.append(gender)
					gender = next((g for g in bestMatch if g != 'unisex'), None)
					if gender is not None:
						return (gender, conf)
					if 'unisex' in bestMatch:
						return ('unisex',0)

				'''- Try to guess first name from: bogdanv, vbogdan'''
				#bogdan
				(gender,conf) = self.resolveFirstNameOverall(name.lower(), True, script)
				if gender is not None:
					if gender == 'blacklist':
						return (None,0)
					return (gender,conf)

				# bogdanv
				(gender, conf) = self.resolveFirstName(name[:-1].lower(), country, True, script)
				if gender is not None:
					if gender == 'blacklist':
						return (None,0)
					return (gender, conf)
				# vbogdan
				(gender, conf) = self.resolveFirstName(name[1:].lower(), country, True, script)
				if gender is not None:
					if gender == 'blacklist':
						return (None, 0)
					return (gender, conf)


			'''I can't believe I'm trying leet'''
			nameL = leet2eng(name)
			(gender, conf) = self.resolveFirstName(extractFirstName(nameL, 'direct'), country, True, script)
			if gender is not None:
				if gender == 'blacklist':
					return (None,0)
				return (gender, conf)

			'''Try also the unidecoded version'''
			dname = unidecode(name)
			(gender, conf) = self.resolveFirstName(extractFirstName(dname, 'direct'), country, True, script)
			if gender is not None:
				if gender == 'blacklist':
					return (None, 0)
				return (gender, conf)
		return (None, 0)
Beispiel #9
0
	def resolveGenderOverall(self, name, script= 'Latin'):
		(gender,conf) = self.initialCheckName(name)
		if gender is not None:
			return (gender,conf)

		'''Extract first name from name string'''
		firstName = extractFirstName(name, 'direct')


		'''If everything failed, try cross-country'''
		(gender,conf) = self.resolveFirstNameOverall(firstName, True, script)
		if gender is not None:
			if gender == 'blacklist':
				return (None,0)
			return (gender,conf)

		'''Try also unidecoded version'''
		dname = unidecode(name)
		(gender,conf) = self.resolveFirstNameOverall(extractFirstName(dname, 'direct'), True, script)
		if gender is not None:
			if gender == 'blacklist':
				return (None,0)
			return (gender,conf)

		'''Try also inverse names: (i.e. family name)'''
		(gender,conf) = self.resolveFirstNameOverall(extractFirstName(name, 'inverse'), True, script)
		if gender is not None:
			if gender == 'blacklist':
				return (None,0)
			return (gender,conf)

		'''Try also middle names: (e.g. J. Marcos Nieto)'''
		if len(name.split()) == 3:
			mid = name.split()[1]
			(gender,conf) = self.resolveFirstNameOverall(mid, True, script)
			if gender is not None:
				if gender == 'blacklist':
					return (None,0)
				return (gender,conf)


		if len(name.split()) == 1:

			#bogdan
			(gender,conf) = self.resolveFirstNameOverall(name.lower(), True, script)
			if gender is not None:
				if gender == 'blacklist':
					return (None,0)
				return (gender,conf)

			'''- Try to guess first name from: bogdanv, vbogdan'''
			# bogdanv
			(gender,conf) = self.resolveFirstNameOverall(name[:-1].lower(), True, script)
			if gender is not None:
				if gender == 'blacklist':
					return (None,0)
				return (gender,conf)
			#vbogdan
			(gender,conf) = self.resolveFirstNameOverall(name[1:].lower(), True, script)
			if gender is not None:
				if gender == 'blacklist':
					return (None,0)
				return (gender,conf)


		return (gender,conf)
Beispiel #10
0
 def tryUnidecoded(self, name):
     dname = unidecode(name)
     firstName = extractFirstName(dname, 'direct')
     return self.tryCrossCountry(firstName)