Esempio n. 1
0
    def genderDotCLookup(self,
                         firstName,
                         country,
                         strict=True,
                         simplified=True):
        gender = None
        genderCountry = None
        country = normaliseCountryName(country)

        try:
            '''Name in dictionary'''
            nameData = self.genderDict[firstName.lower()]

            def lab2key(lab):
                if lab in ['M', '1M', '?M']:
                    return 'mmale'
                elif lab in ['F', '1F', '?F']:
                    return 'mfemale'
                elif lab == '?':
                    return 'uni'

            d = {}
            for lab in ['M', '1M', '?M', 'F', '1F', '?F', '?']:
                d[lab2key(lab)] = 0.0

            for [mf, frequencies] in nameData:
                for idx in range(len(frequencies)):
                    hexFreq = frequencies[idx]
                    if len(hexFreq.strip()) == 1:
                        d[lab2key(mf)] += int(hexFreq, 16)

            thr = 256
            if d['mmale'] - d['mfemale'] > thr:
                gender = 'male'
            elif (thr >= d['mmale'] - d['mfemale']) and (d['mmale'] >
                                                         d['mfemale']):
                gender = 'mostly male'
            elif d['mfemale'] - d['mmale'] > thr:
                gender = 'female'
            elif (thr >= d['mfemale'] - d['mmale']) and (d['mfemale'] >
                                                         d['mmale']):
                gender = 'mostly female'
            else:
                gender = 'unisex'
            '''Options:
			1. I query for an existing name in a known country
			2. I query for an existing name in a country other
			than the ones I have data for'''
            if country in self.countriesOrder.keys():
                '''Here I still don't know if I have frequency information
				for this name and this country'''
                countryData = []
                '''[mf, frequencies] mf = M,1M,?M, F,1F,?F, ?, ='''
                for [mf, frequencies] in nameData:
                    f = frequencies[self.countriesOrder[country]]
                    if len(f.strip()) == 1:
                        '''The name exists for that country'''
                        countryData.append([mf, int(f, 16)])

                if len(countryData) == 1:
                    '''The name is known for this country, and so is its gender'''
                    genderCode = countryData[0][0]
                    if genderCode == 'M':
                        genderCountry = "male"
                    elif genderCode in ['1M', '?M']:
                        genderCountry = "mostly male"
                    elif genderCode == 'F':
                        genderCountry = "female"
                    elif genderCode in ['1F', '?F']:
                        genderCountry = "mostly female"
                    elif genderCode == '?':
                        genderCountry = "unisex"
        except:
            gender = None

        if strict:
            gender = genderCountry
        return formatOutput(gender, simplified)
Esempio n. 2
0
	def genderDotCLookup(self, firstName, country, strict=True, simplified=True):
		gender = None
		genderCountry = None
		country = normaliseCountryName(country)
		
		try: 
			'''Name in dictionary'''
			nameData = self.genderDict[firstName.lower()]
			
			def lab2key(lab):
				if lab in ['M', '1M', '?M']:
					return 'mmale'
				elif lab in ['F', '1F', '?F']:
					return 'mfemale'
				elif lab == '?':
					return 'uni'
			
			d = {}
			for lab in ['M', '1M', '?M', 'F', '1F', '?F', '?']:
				d[lab2key(lab)] = 0.0
			
			for [mf, frequencies] in nameData:
				for idx in range(len(frequencies)):
					hexFreq = frequencies[idx]
					if len(hexFreq.strip()) == 1:
						d[lab2key(mf)] += int(hexFreq, 16)
			
			thr = 256
			if d['mmale'] - d['mfemale'] > thr:
				gender = 'male'
			elif (thr >= d['mmale']-d['mfemale']) and (d['mmale'] > d['mfemale']):
				gender = 'mostly male'
			elif d['mfemale'] - d['mmale'] > thr:
				gender = 'female'
			elif (thr >= d['mfemale']-d['mmale']) and (d['mfemale'] > d['mmale']):
				gender = 'mostly female'
			else:
				gender = 'unisex'
			
			'''Options:
			1. I query for an existing name in a known country
			2. I query for an existing name in a country other
			than the ones I have data for'''
			if country in self.countriesOrder.keys():
				'''Here I still don't know if I have frequency information
				for this name and this country'''
				countryData = []
				'''[mf, frequencies] mf = M,1M,?M, F,1F,?F, ?, ='''
				for [mf, frequencies] in nameData:
					f = frequencies[self.countriesOrder[country]]
					if len(f.strip()) == 1:
						'''The name exists for that country'''
						countryData.append([mf, int(f, 16)])
				
				if len(countryData) == 1:
					'''The name is known for this country, and so is its gender'''
					genderCode = countryData[0][0]
					if genderCode == 'M':
						genderCountry = "male"
					elif genderCode in ['1M', '?M']:
						genderCountry = "mostly male"
					elif genderCode == 'F':
						genderCountry = "female"
					elif genderCode in ['1F', '?F']:
						genderCountry = "mostly female"
					elif genderCode == '?':
						genderCountry = "unisex"
		except:
			gender = None
		
		if strict:
			gender = genderCountry
		return formatOutput(gender, simplified)