class Index:
    """
    Generate index based on the position in bytes
    of every letter in the alphabet.
    The index is stored in an OrderedDict.
    It's a lazy index, the index is not generated
    till the first call.
    """
    def __init__(self, country='us'):
        self.indices = []
        self.country = Country(country)

    def __call__(self, letter):
        if len(self.indices) == 0:
            self._generate_index()
        return self.indices[letter.upper()]

    def _generate_index(self):
        self.indices = OrderedDict()
        with open(self.country.file()) as file:
            total = file.readline()  # Omit headers line
            for line in file:
                if line[0] not in self.indices:
                    self.indices[line[0]] = len(total)
                total = total + line
class GenderDetector:
    def __init__(self, country='us', unknown_value='unknown'):
        self.index = Index(country)
        self.country = Country(country)
        self.unknown_value = unknown_value

    def guess(self, name):
        name = self._format_name(name)

        initial_position = self.index(name[0])
        with open(self.country.file()) as csvfile:
            csvfile.seek(initial_position)
            reader = csv.reader(csvfile)
            for row in reader:
                if row[0] == name:
                    return self._guess(row)
            return self.unknown_value

    def _guess(self, row):
        gender = self.country.guess(row)
        if gender in ['male', 'female']:
            return gender
        else:
            return self.unknown_value

    def _format_name(self, name):
        name = name.strip()
        return name[0].upper() + name[1:].lower().strip()
class GenderDetector:
    def __init__(self, country='us', unknown_value='unknown'):
        self.index = Index(country)
        self.country = Country(country)
        self.unknown_value = unknown_value

    def guess(self, name):
        name = self._format_name(name)

        initial_position = self.index(name[0])
        
        
    
        with open(self.country.file()) as csvfile:
            csvfile.seek(initial_position)
            reader = csv.reader(csvfile)
            for row in reader:
                if row[0] == name:
                    return self._guess(row)
                    
            return self.unknown_value

    def _guess(self, row):
        gender = self.country.guess(row)
        if gender in ['male', 'female']:
            return gender
        else:
            return self.unknown_value

    def _format_name(self, name):
        name = name.strip()
        return name[0].upper() + name[1:].lower().strip()
Esempio n. 4
0
class Index:
    """
    Generate index based on the position in bytes
    of every letter in the alphabet.
    The index is stored in an OrderedDict.
    It's a lazy index, the index is not generated
    till the first call.
    """
    def __init__(self, country='us'):
        self.indices = []
        self.country = Country(country)

    def __call__(self, letter):
        if len(self.indices) == 0:
            self._generate_index()
        return self.indices[letter.upper()]

    def _generate_index(self):
        self.indices = OrderedDict()
        with open(self.country.file()) as file:
            total = file.readline() # Omit headers line
            for line in file:
                if line[0] not in self.indices:
                    self.indices[line[0]] = len(total)
                total = total + line