예제 #1
0
	def resolveGender(self, name, country):
		# Check if name is written in Cyrillic or Greek script, and transliterate
		if only_cyrillic_chars(name) or only_greek_chars(name):
			name = unidecode(name)
		
		firstName = extractFirstName(name, 'direct')
		
		gender = self.tryCrossCountry(firstName)
		if gender is not None:
			return gender

		gender = self.tryUnidecoded(name)
		if gender is not None:
			return gender
		
		gender = self.tryRemovingFirstAndLastLetters(name)
		if gender is not None:
			return gender

		name = re.sub("\d+", "", name)

		gender = self.tryRemovingFirstAndLastLetters(name)
		if gender is not None:
			return gender

		return None
예제 #2
0
    def resolveGender(self, name, country):
        # Check if name is written in Cyrillic or Greek script, and transliterate
        if only_cyrillic_chars(name) or only_greek_chars(name):
            name = unidecode(name)

        firstName = extractFirstName(name, 'direct')

        gender = self.tryCrossCountry(firstName)
        if gender is not None:
            return gender

        gender = self.tryUnidecoded(name)
        if gender is not None:
            return gender

        gender = self.tryRemovingFirstAndLastLetters(name)
        if gender is not None:
            return gender

        name = re.sub("\d+", "", name)

        gender = self.tryRemovingFirstAndLastLetters(name)
        if gender is not None:
            return gender

        return None
예제 #3
0
 def initialCheckName(self, name):
     '''Check if name is written in Cyrillic or Greek script, and transliterate'''
     if only_cyrillic_chars(name) or only_greek_chars(name):
         name = unidecode(name)
     '''Initial check for gender-specific words at the beginning of the name'''
     f = name.split()[0]
     if f in self.maleWords:
         conf = 1
         return ('male', conf)
     elif f in self.femaleWords:
         conf = 1
         return ('female', conf)
     '''Check for gender-specific words at the second part of the name'''
     if len(name.split()) > 1:
         l = name.split()[1]
         if l in self.maleWords:
             conf = 1
             return ('male', conf)
         elif l in self.femaleWords:
             conf = 1
             return ('female', conf)
     return (None, 0)
예제 #4
0
	def initialCheckName(self, name):
		'''Check if name is written in Cyrillic or Greek script, and transliterate'''
		if only_cyrillic_chars(name) or only_greek_chars(name):
			name = unidecode(name)

		'''Initial check for gender-specific words at the beginning of the name'''
		f = name.split()[0]
		if f in self.maleWords:
			conf = 1
			return ('male',conf)
		elif f in self.femaleWords:
			conf = 1
			return ('female', conf)

		'''Check for gender-specific words at the second part of the name'''
		if len(name.split())> 1:
			l = name.split()[1]
			if l in self.maleWords:
				conf = 1
				return ('male',conf)
			elif l in self.femaleWords:
				conf = 1
				return ('female', conf)
		return (None,0)
예제 #5
0
    def resolveGender(self, name, country):
        '''Check if name is written in Cyrillic or Greek script, and transliterate'''
        if only_cyrillic_chars(name) or only_greek_chars(name):
            name = unidecode(name)
        '''Check if empty string'''
        if len(name.strip()) < 1:
            return None
        '''Initial check for gender-specific words at the beginning of the name'''
        f = name.split()[0]
        if f in self.maleWords:
            return 'male'
        elif f in self.femaleWords:
            return 'female'
        '''Extract first name from name string'''
        firstName = extractFirstName(name, 'direct')

        if country is not None:
            '''Start with suffixes
			Works well for Russians (can determine gender based on surname suffix)'''
            if country in self.suffixes.keys():
                gender = self.suffixLookup(name, country)
                if gender is not None:
                    return gender
            '''If still no luck, extract first name and try to resolve'''
            gender = self.resolveFirstName(firstName, country, True)
            if gender is not None:
                if gender == 'blacklist':
                    return None
                return gender
            '''Try to inverse if no luck
			Hungarians use reversed first/last names order'''
            if country in self.invOrder:
                gender = self.suffixLookup(inverseNameParts(name), country)
                if gender is not None:
                    return gender

                gender = self.resolveFirstName(
                    extractFirstName(name, 'inverse'), country, True)
                if gender is not None:
                    if gender == 'blacklist':
                        return None
                    return gender
            '''Starting to get desperate by now. Assume name is in fact username,
			and try different tricks:'''
            if len(name.split()) == 1:
                '''- Try the Dutch tricks'''
                if country in ['Belgium', 'The Netherlands', 'South Africa']:
                    positions = [m.start() for m in re.finditer('v', name)]
                    bestMatch = []
                    if len(positions):
                        for pos in positions:
                            gender = self.resolveFirstName(
                                name[:pos], country, True)
                            if gender is not None:
                                if gender != 'blacklist':
                                    bestMatch.append(gender)
                    gender = next((g for g in bestMatch if g != 'unisex'),
                                  None)
                    if gender is not None:
                        return gender
                    if 'unisex' in bestMatch:
                        return 'unisex'
                '''- Try to guess first name from: bogdanv, vbogdan'''
                # bogdanv
                gender = self.resolveFirstName(name[:-1].lower(), country,
                                               True)
                if gender is not None:
                    if gender == 'blacklist':
                        return None
                    return gender
                # vbogdan
                gender = self.resolveFirstName(name[1:].lower(), country, True)
                if gender is not None:
                    if gender == 'blacklist':
                        return None
                    return gender
            '''I can't believe I'm trying leet'''
            nameL = leet2eng(name)
            gender = self.resolveFirstName(extractFirstName(nameL, 'direct'),
                                           country, True)
            if gender is not None:
                if gender == 'blacklist':
                    return None
                return gender
            '''Try also the unidecoded version'''
            dname = unidecode(name)
            gender = self.resolveFirstName(extractFirstName(dname, 'direct'),
                                           country, True)
            if gender is not None:
                if gender == 'blacklist':
                    return None
                return gender
        '''If everything failed, try cross-country'''
        gender = self.resolveFirstNameOverall(firstName, True)
        if gender is not None:
            if gender == 'blacklist':
                return None
            return gender
        '''Try also unidecoded version'''
        dname = unidecode(name)
        gender = self.resolveFirstNameOverall(
            extractFirstName(dname, 'direct'), True)
        if gender is not None:
            if gender == 'blacklist':
                return None
            return gender

        if len(name.split()) == 1:
            '''- Try to guess first name from: bogdanv, vbogdan'''
            # bogdanv
            gender = self.resolveFirstNameOverall(name[:-1].lower(), True)
            if gender is not None:
                if gender == 'blacklist':
                    return None
                return gender


#			 vbogdan
            gender = self.resolveFirstNameOverall(name[1:].lower(), True)
            if gender is not None:
                if gender == 'blacklist':
                    return None
                return gender

        return None
예제 #6
0
	def resolveGender(self, name, country):
		'''Check if name is written in Cyrillic or Greek script, and transliterate'''
		if only_cyrillic_chars(name) or only_greek_chars(name):
			name = unidecode(name)
		
		'''Initial check for gender-specific words at the beginning of the name'''
		f = name.split()[0]
		if f in self.maleWords:
			return 'male'
		elif f in self.femaleWords:
			return 'female'
		
		'''Extract first name from name string'''
		firstName = extractFirstName(name, 'direct')
		
		if country is not None:
			'''Start with suffixes
			Works well for Russians (can determine gender based on surname suffix)'''
			if country in self.suffixes.keys():
				gender = self.suffixLookup(name, country)
				if gender is not None:
					return gender
			'''If still no luck, extract first name and try to resolve'''
			gender = self.resolveFirstName(firstName, country, True)
			if gender is not None:
				if gender == 'blacklist':
					return None
				return gender
			
			'''Try to inverse if no luck
			Hungarians use reversed first/last names order'''
			if country in self.invOrder:
				gender = self.suffixLookup(inverseNameParts(name), country)
				if gender is not None:
					return gender
				
				gender = self.resolveFirstName(extractFirstName(name, 'inverse'), country, True)
				if gender is not None:
					if gender == 'blacklist':
						return None
					return gender
			
			'''Starting to get desperate by now. Assume name is in fact username,
			and try different tricks:'''
			if len(name.split()) == 1:
				'''- Try the Dutch tricks'''
				if country in ['Belgium', 'The Netherlands', 'South Africa']:
					positions = [m.start() for m in re.finditer('v', name)]
					bestMatch = []
					if len(positions):
						for pos in positions:
							gender = self.resolveFirstName(name[:pos], country, True)
							if gender is not None:
								if gender != 'blacklist':
									bestMatch.append(gender)
					gender = next((g for g in bestMatch if g != 'unisex'), None)
					if gender is not None:
						return gender
					if 'unisex' in bestMatch:
						return 'unisex'
				
				'''- Try to guess first name from: bogdanv, vbogdan'''
				# bogdanv
				gender = self.resolveFirstName(name[:-1].lower(), country, True)
				if gender is not None:
					if gender == 'blacklist':
						return None
					return gender
				# vbogdan
				gender = self.resolveFirstName(name[1:].lower(), country, True)
				if gender is not None:
					if gender == 'blacklist':
						return None
					return gender
			
			'''I can't believe I'm trying leet'''
			nameL = leet2eng(name)
			gender = self.resolveFirstName(extractFirstName(nameL, 'direct'), country, True)
			if gender is not None:
				if gender == 'blacklist':
					return None
				return gender
			
			'''Try also the unidecoded version'''
			dname = unidecode(name)
			gender = self.resolveFirstName(extractFirstName(dname, 'direct'), country, True)
			if gender is not None:
				if gender == 'blacklist':
					return None
				return gender
		
		'''If everything failed, try cross-country'''
		gender = self.resolveFirstNameOverall(firstName, True)
		if gender is not None:
			if gender == 'blacklist':
				return None
			return gender
		'''Try also unidecoded version'''
		dname = unidecode(name)
		gender = self.resolveFirstNameOverall(extractFirstName(dname, 'direct'), True)
		if gender is not None:
			if gender == 'blacklist':
				return None
			return gender
		
		if len(name.split()) == 1:
			'''- Try to guess first name from: bogdanv, vbogdan'''
			# bogdanv
			gender = self.resolveFirstNameOverall(name[:-1].lower(), True)
			if gender is not None:
				if gender == 'blacklist':
					return None
				return gender
#			 vbogdan
			gender = self.resolveFirstNameOverall(name[1:].lower(), True)
			if gender is not None:
				if gender == 'blacklist':
					return None
				return gender
				
		return None