class Index: """ Generate index based on the position in bytes of every letter in the alphabet. The index is stored in an OrderedDict. It's a lazy index, the index is not generated till the first call. """ def __init__(self, country='us'): self.indices = [] self.country = Country(country) def __call__(self, letter): if len(self.indices) == 0: self._generate_index() return self.indices[letter.upper()] def _generate_index(self): self.indices = OrderedDict() with open(self.country.file()) as file: total = file.readline() # Omit headers line for line in file: if line[0] not in self.indices: self.indices[line[0]] = len(total) total = total + line
class GenderDetector: def __init__(self, country='us', unknown_value='unknown'): self.index = Index(country) self.country = Country(country) self.unknown_value = unknown_value def guess(self, name): name = self._format_name(name) initial_position = self.index(name[0]) with open(self.country.file()) as csvfile: csvfile.seek(initial_position) reader = csv.reader(csvfile) for row in reader: if row[0] == name: return self._guess(row) return self.unknown_value def _guess(self, row): gender = self.country.guess(row) if gender in ['male', 'female']: return gender else: return self.unknown_value def _format_name(self, name): name = name.strip() return name[0].upper() + name[1:].lower().strip()