Ejemplo n.º 1
0
    def pluralize(self, word):
        '''
        Pluralizes English nouns.
        '''
        rules = {
            'regular':
            [['(?i)(on)$', 'a'], ['(?i)(alumn|alg)a$', '\\1ae'],
             ['(?i)([ti])um$', '\\1a'], ['(?i)(ndum)$', 'nda'],
             ['(?i)(gen|visc)us$', '\\1era'], ['(?i)(corp)us$', '\\1ora'],
             ['(?i)(octop|vir|alumn|bacill|cact|foc|fung)us$', '\\1i'],
             ['(?i)(loc|nucle|radi|stimul|styl|succub)us$', '\\1i'],
             ['(?i)(syllab|termin|tor)us$', '\\1i'], ['(?i)(us)$', '\\1es'],
             ['(?i)(matr|vert|ind)(ix|ex)$', '\\1ices'],
             ['(?i)([m|l])ouse$', '\\1ice'], ['(?i)(hive)$', '\\1s'],
             ['(?i)(s|t|x)is$', '\\1es'], ['^(?i)(ox)$', '\\1en'],
             ['(?i)(quiz)$', '\\1zes'],
             ['(?i)(?:([^f])fe|([aelor])f)$', '\\1\\2ves'],
             ['(?i)(([p|m]atriar|monar|stoma|con|epo)ch)$', '\\1s'],
             ['(?i)(x|ch|s|ss|sh|z)$',
              '\\1es'], ['(?i)([^aeiouy]o)$', '\\1es'],
             ['(?i)([^aeiouy]|qu)y$', '\\1ies'], ['(?i)$', 's']],
            'irregular': {
                'albino': 'albinos',
                'armadillo': 'armadillos',
                'auto': 'autos',
                'cello': 'cellos',
                'chief': 'chiefs',
                'child': 'children',
                'combo': 'combos',
                'ego': 'egos',
                'foot': 'feet',
                'goose': 'geese',
                'halo': 'halos',
                'inferno': 'infernos',
                'lasso': 'lassos',
                'man': 'men',
                'memento': 'mementos',
                'memo': 'memos',
                'person': 'people',
                'piano': 'pianos',
                'photo': 'photos',
                'pro': 'pros',
                'safe': 'safes',
                'sex': 'sexes',
                'silo': 'silos',
                'solo': 'solos',
                'staff': 'staves',
                'taco': 'tacos',
                'tooth': 'teeth',
                'tuxedo': 'tuxedos',
                'typo': 'typos',
                'veto': 'vetos',
                'yo': 'yos'
            },
            'countable': [
                'aircraft', 'cannon', 'deer', 'elk', 'equipment', 'fish',
                'glasses', 'information', 'money', 'moose', 'news', 'pants',
                'pliers', 'politics', 'rice', 'savings', 'scissors', 'series',
                'sheep', 'species', 'swine'
            ]
        }

        word = word.lower()

        for key, value in self.cache.items():
            if word == key or word == value:
                return value

        if word in rules['countable']:
            self.cache[word] = word
            return word

        for key, value in rules['irregular'].items():
            if word == key or word == value:
                self.cache[key] = value
                return value

        for rule in range(0, len(rules['regular'])):
            match = re.search(rules['regular'][rule][0], word, re.IGNORECASE)
            if match:
                groups = match.groups()
                for k in range(0, len(groups)):
                    if groups[k] is None:
                        rules['regular'][rule][1] = rules['regular'][rule][
                            1].replace('\\' + str(k + 1), '')
                self.cache[word] = re.sub(rules['regular'][rule][0],
                                          rules['regular'][rule][1], word)
                return self.cache[word]

        return Base.pluralize(self, word)
Ejemplo n.º 2
0
    def pluralize(self, word):
        '''
        Pluralizes English nouns.
        '''
        rules = {
            'regular': [
                ['(?i)(on)$', 'a'],
                ['(?i)(alumn|alg)a$', '\\1ae'],
                ['(?i)([ti])um$', '\\1a'],
                ['(?i)(ndum)$', 'nda'],
                ['(?i)(gen|visc)us$', '\\1era'],
                ['(?i)(corp)us$', '\\1ora'],
                ['(?i)(octop|vir|alumn|bacill|cact|foc|fung)us$', '\\1i'],
                ['(?i)(loc|nucle|radi|stimul|styl|succub)us$', '\\1i'],
                ['(?i)(syllab|termin|tor)us$', '\\1i'],
                ['(?i)(us)$', '\\1es'],
                ['(?i)(matr|vert|ind)(ix|ex)$', '\\1ices'],
                ['(?i)([m|l])ouse$', '\\1ice'],
                ['(?i)(hive)$', '\\1s'],
                ['(?i)(s|t|x)is$', '\\1es'],
                ['^(?i)(ox)$', '\\1en'],
                ['(?i)(quiz)$', '\\1zes'],
                ['(?i)(?:([^f])fe|([aelor])f)$', '\\1\\2ves'],
                ['(?i)(([p|m]atriar|monar|stoma|con|epo)ch)$', '\\1s'],
                ['(?i)(x|ch|s|ss|sh|z)$', '\\1es'],
                ['(?i)([^aeiouy]o)$', '\\1es'],
                ['(?i)([^aeiouy]|qu)y$', '\\1ies'],
                ['(?i)$', 's']
            ],
            'irregular': {
                'albino': 'albinos',
                'armadillo': 'armadillos',
                'auto': 'autos',
                'cello': 'cellos',
                'chief': 'chiefs',
                'child': 'children',
                'combo': 'combos',
                'ego': 'egos',
                'foot': 'feet',
                'goose': 'geese',
                'halo': 'halos',
                'inferno': 'infernos',
                'lasso': 'lassos',
                'man': 'men',
                'memento': 'mementos',
                'memo': 'memos',
                'person': 'people',
                'piano': 'pianos',
                'photo': 'photos',
                'pro': 'pros',
                'safe': 'safes',
                'sex': 'sexes',
                'silo': 'silos',
                'solo': 'solos',
                'staff': 'staves',
                'taco': 'tacos',
                'tooth': 'teeth',
                'tuxedo': 'tuxedos',
                'typo': 'typos',
                'veto': 'vetos',
                'yo': 'yos'
            },
            'countable': [
                'aircraft',
                'cannon',
                'deer',
                'elk',
                'equipment',
                'fish',
                'glasses',
                'information',
                'money',
                'moose',
                'news',
                'pants',
                'pliers',
                'politics',
                'rice',
                'savings',
                'scissors',
                'series',
                'sheep',
                'species',
                'swine'
            ]
        }

        word = word.lower()

        for key, value in self.cache.items():
            if word == key or word == value:
                return value

        if word in rules['countable']:
            self.cache[word] = word
            return word

        for key, value in rules['irregular'].items():
            if word == key or word == value:
                self.cache[key] = value
                return value

        for rule in range(0, len(rules['regular'])):
            match = re.search(rules['regular'][rule][0], word,
                              re.IGNORECASE)
            if match:
                groups = match.groups()
                for k in range(0, len(groups)):
                    if groups[k] is None:
                        rules['regular'][rule][1] = rules['regular'][
                            rule][1].replace('\\' + str(k + 1), '')
                self.cache[word] = re.sub(rules['regular'][rule][0],
                                          rules['regular'][rule][1],
                                          word)
                return self.cache[word]

        return Base.pluralize(self, word)