Python u Exemples, nameparser.util.u Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : names_test.py Projet : vaneseltine/python-nameparser

 def test_formating_removing_pieces_from_name_buckets(self):
     hn = HumanName("Rev John A. Kenneth Doe III (Kenny)")
     hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'"
     assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'"
     hn.string_format = "{title} {first} {middle} {last} {suffix}"
     assert u(hn) == "Rev John A. Kenneth Doe III"
     hn.middle = ""
     assert u(hn) == "Rev John Doe III"
     hn.suffix = ""
     assert u(hn) == "Rev John Doe"
     hn.title = ""
     assert u(hn) == "John Doe"

Exemple #2

0

Afficher le fichier

Fichier : parser.py Projet : liormagen/python-nameparser

 def capitalize(self):
     """
     The HumanName class can try to guess the correct capitalization 
     of name entered in all upper or lower case. It will not adjust 
     the case of names entered in mixed case.
     
     **Usage**
     
     .. doctest:: capitalize
     
         >>> name = HumanName('bob v. de la macdole-eisenhower phd')
         >>> name.capitalize()
         >>> str(name)
         'Bob V. de la MacDole-Eisenhower Ph.D.'
         >>> # Don't touch good names
         >>> name = HumanName('Shirley Maclaine')
         >>> name.capitalize()
         >>> str(name) 
         'Shirley Maclaine'
     
     """
     name = u(self)
     if not (name == name.upper() or name == name.lower()):
         return
     self.title_list = self.cap_piece(self.title).split(' ')
     self.first_list = self.cap_piece(self.first).split(' ')
     self.middle_list = self.cap_piece(self.middle).split(' ')
     self.last_list = self.cap_piece(self.last).split(' ')
     self.suffix_list = self.cap_piece(self.suffix).split(', ')

Exemple #3

0

Afficher le fichier

Fichier : parser.py Projet : JoshBradshaw/python-nameparser

 def capitalize(self):
     """
     The HumanName class can try to guess the correct capitalization 
     of name entered in all upper or lower case. It will not adjust 
     the case of names entered in mixed case.
     
     **Usage**
     
     .. doctest:: capitalize
     
         >>> name = HumanName('bob v. de la macdole-eisenhower phd')
         >>> name.capitalize()
         >>> unicode(name)
         u'Bob V. de la MacDole-Eisenhower Ph.D.'
         >>> # Don't touch good names
         >>> name = HumanName('Shirley Maclaine')
         >>> name.capitalize()
         >>> unicode(name) 
         u'Shirley Maclaine'
     
     """
     name = u(self)
     if not (name == name.upper() or name == name.lower()):
         return
     self.title_list  = self.cap_piece(self.title ).split(' ')
     self.first_list  = self.cap_piece(self.first ).split(' ')
     self.middle_list = self.cap_piece(self.middle).split(' ')
     self.last_list   = self.cap_piece(self.last  ).split(' ')
     self.suffix_list = self.cap_piece(self.suffix).split(', ')

Exemple #4

0

Afficher le fichier

Fichier : parser.py Projet : derek73/python-nameparser

    def capitalize(self, force=False):
        """
        The HumanName class can try to guess the correct capitalization of name
        entered in all upper or lower case. By default, it will not adjust the
        case of names entered in mixed case. To run capitalization on all names
        pass the parameter `force=True`.
        
        :param bool force: force capitalization of strings that include mixed case

        **Usage**
        
        .. doctest:: capitalize
        
            >>> name = HumanName('bob v. de la macdole-eisenhower phd')
            >>> name.capitalize()
            >>> str(name)
            'Bob V. de la MacDole-Eisenhower Ph.D.'
            >>> # Don't touch good names
            >>> name = HumanName('Shirley Maclaine')
            >>> name.capitalize()
            >>> str(name) 
            'Shirley Maclaine'
            >>> name.capitalize(force=True)
            >>> str(name) 
            'Shirley MacLaine'
        
        """
        name = u(self)
        if not force and not (name == name.upper() or name == name.lower()):
            return
        self.title_list  = self.cap_piece(self.title , 'title').split(' ')
        self.first_list  = self.cap_piece(self.first , 'first').split(' ')
        self.middle_list = self.cap_piece(self.middle, 'middle').split(' ')
        self.last_list   = self.cap_piece(self.last  , 'last').split(' ')
        self.suffix_list = self.cap_piece(self.suffix, 'suffix').split(', ')

Exemple #5

0

Afficher le fichier

Fichier : names_test.py Projet : vaneseltine/python-nameparser

 def test_keep_emojis(self):
     constants = Constants()
     constants.regexes.emoji = False
     hn = HumanName("∫≜⩕ Smith😊", constants)
     assert hn.first == "∫≜⩕"
     assert hn.last == "Smith😊"
     assert u(hn) == "∫≜⩕ Smith😊"

Exemple #6

0

Afficher le fichier

    def capitalize(self, force=False):
        """
        The HumanName class can try to guess the correct capitalization of name
        entered in all upper or lower case. By default, it will not adjust the
        case of names entered in mixed case. To run capitalization on all names
        pass the parameter `force=True`.
        
        :param bool force: force capitalization of strings that include mixed case

        **Usage**
        
        .. doctest:: capitalize
        
            >>> name = HumanName('bob v. de la macdole-eisenhower phd')
            >>> name.capitalize()
            >>> str(name)
            'Bob V. de la MacDole-Eisenhower Ph.D.'
            >>> # Don't touch good names
            >>> name = HumanName('Shirley Maclaine')
            >>> name.capitalize()
            >>> str(name) 
            'Shirley Maclaine'
            >>> name.capitalize(force=True)
            >>> str(name) 
            'Shirley MacLaine'
        
        """
        name = u(self)
        if not force and not (name == name.upper() or name == name.lower()):
            return
        self.title_list = self.cap_piece(self.title).split(' ')
        self.first_list = self.cap_piece(self.first).split(' ')
        self.middle_list = self.cap_piece(self.middle).split(' ')
        self.last_list = self.cap_piece(self.last).split(' ')
        self.suffix_list = self.cap_piece(self.suffix).split(', ')

Exemple #7

0

Afficher le fichier

Fichier : names_test.py Projet : vaneseltine/python-nameparser

    def test_formatting_constants_attribute(self):
        from nameparser.config import CONSTANTS

        _orig = CONSTANTS.string_format
        CONSTANTS.string_format = "TEST2"
        hn = HumanName("Rev John A. Kenneth Doe III (Kenny)")
        assert u(hn) == "TEST2"
        CONSTANTS.string_format = _orig

Exemple #8

0

Afficher le fichier

Fichier : names_test.py Projet : vaneseltine/python-nameparser

 def test_formating_removing_keys_from_format_string(self):
     hn = HumanName("Rev John A. Kenneth Doe III (Kenny)")
     hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'"
     assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'"
     hn.string_format = "{last}, {title} {first} {middle}, {suffix}"
     assert u(hn) == "Doe, Rev John A. Kenneth, III"
     hn.string_format = "{last}, {title} {first} {middle}"
     assert u(hn) == "Doe, Rev John A. Kenneth"
     hn.string_format = "{last}, {first} {middle}"
     assert u(hn) == "Doe, John A. Kenneth"
     hn.string_format = "{last}, {first}"
     assert u(hn) == "Doe, John"
     hn.string_format = "{first} {last}"
     assert u(hn) == "John Doe"

Exemple #9

0

Afficher le fichier

Fichier : parser.py Projet : aikimark/python-nameparser

    def capitalize(self, force=None):
        """
        The HumanName class can try to guess the correct capitalization of name
        entered in all upper or lower case. By default, it will not adjust the
        case of names entered in mixed case. To run capitalization on all names
        pass the parameter `force=True`.

        :param bool force: Forces capitalization of mixed case strings. This
            parameter overrides rules set within
            :py:class:`~nameparser.config.CONSTANTS`.

        **Usage**

        .. doctest:: capitalize

            >>> name = HumanName('bob v. de la macdole-eisenhower phd')
            >>> name.capitalize()
            >>> str(name)
            'Bob V. de la MacDole-Eisenhower Ph.D.'
            >>> # Don't touch good names
            >>> name = HumanName('Shirley Maclaine')
            >>> name.capitalize()
            >>> str(name)
            'Shirley Maclaine'
            >>> name.capitalize(force=True)
            >>> str(name)
            'Shirley MacLaine'

        """
        name = u(self)
        force = self.C.force_mixed_case_capitalization \
            if force is None else force

        if not force and not (name == name.upper() or name == name.lower()):
            return
        self.title_list = self.cap_piece(self.title, 'title').split(' ')
        self.first_list = self.cap_piece(self.first, 'first').split(' ')
        self.middle_list = self.cap_piece(self.middle, 'middle').split(' ')
        self.last_list = self.cap_piece(self.last, 'last').split(' ')
        self.suffix_list = self.cap_piece(self.suffix, 'suffix').split(', ')

Exemple #10

0

Afficher le fichier

Fichier : parser.py Projet : JoshBradshaw/python-nameparser

    def parse_full_name(self):
        """
        The main parse method for the parser. This method is run upon assignment to the
        :py:attr:`full_name` attribute or instantiation.

        Basic flow is to hand off to :py:func:`pre_process` to handle nicknames. It
        then splits on commas and chooses a code path depending on the number of commas.
        :py:func:`parse_pieces` then splits those parts on spaces and
        :py:func:`join_on_conjunctions` joins any pieces next to conjunctions. 
        """
        
        self.title_list = []
        self.first_list = []
        self.middle_list = []
        self.last_list = []
        self.suffix_list = []
        self.nickname_list = []
        self.unparsable = True
        
        if not isinstance(self._full_name, text_type):
            self._full_name = u(self._full_name, self.ENCODING)
        
        self.pre_process()
        
        # collapse multiple spaces
        self._full_name = self.C.regexes.spaces.sub(" ", self._full_name.strip())
        
        # break up full_name by commas
        parts = [x.strip() for x in self._full_name.split(",")]
        
        log.debug("full_name: {0}".format(self._full_name))
        log.debug("parts: {0}".format(parts))
        
        if len(parts) == 1:
            
            # no commas, title first middle middle middle last suffix
            
            pieces = self.parse_pieces(parts)
            
            for i, piece in enumerate(pieces):
                try:
                    nxt = pieces[i + 1]
                except IndexError:
                    nxt = None
                
                # title must have a next piece, unless it's just a title
                if self.is_title(piece) and (nxt or len(pieces) == 1):
                    self.title_list.append(piece)
                    continue
                if not self.first:
                    self.first_list.append(piece)
                    continue
                if (i == len(pieces) - 2) and self.is_suffix(nxt):
                    self.last_list.append(piece)
                    self.suffix_list.append(nxt)
                    break
                if not nxt:
                    self.last_list.append(piece)
                    continue
                
                self.middle_list.append(piece)
        else:
            if self.is_suffix(parts[1]):
                
                # suffix comma: title first middle last, suffix [, suffix]
                
                self.suffix_list += parts[1:]
                
                pieces = self.parse_pieces(parts[0].split(' '))
                log.debug("pieces: {0}".format(u(pieces)))
                
                for i, piece in enumerate(pieces):
                    try:
                        nxt = pieces[i + 1]
                    except IndexError:
                        nxt = None

                    if self.is_title(piece) and (nxt or len(pieces) == 1):
                        self.title_list.append(piece)
                        continue
                    if not self.first:
                        self.first_list.append(piece)
                        continue
                    if not nxt:
                        self.last_list.append(piece)
                        continue
                    self.middle_list.append(piece)
            else:
                
                # lastname comma: last, title first middles[,] suffix [,suffix]
                pieces = self.parse_pieces(parts[1].split(' '), 1)
                
                log.debug("pieces: {0}".format(u(pieces)))
                
                self.last_list.append(parts[0])
                for i, piece in enumerate(pieces):
                    try:
                        nxt = pieces[i + 1]
                    except IndexError:
                        nxt = None
                    
                    if self.is_title(piece) and (nxt or len(pieces) == 1):
                        self.title_list.append(piece)
                        continue
                    if not self.first:
                        self.first_list.append(piece)
                        continue
                    if self.is_suffix(piece):
                        self.suffix_list.append(piece)
                        continue
                    self.middle_list.append(piece)
                try:
                    if parts[2]:
                        self.suffix_list += parts[2:]
                except IndexError:
                    pass
                
        if len(self) < 0:
            log.info("Unparsable full_name: " + self._full_name)
        else:
            self.unparsable = False
            self.post_process()

Exemple #11

0

Afficher le fichier

Fichier : parser.py Projet : liormagen/python-nameparser

    def parse_full_name(self):
        """
        The main parse method for the parser. This method is run upon assignment to the
        :py:attr:`full_name` attribute or instantiation.

        Basic flow is to hand off to :py:func:`pre_process` to handle nicknames. It
        then splits on commas and chooses a code path depending on the number of commas.
        :py:func:`parse_pieces` then splits those parts on spaces and
        :py:func:`join_on_conjunctions` joins any pieces next to conjunctions. 
        """

        self.title_list = []
        self.first_list = []
        self.middle_list = []
        self.last_list = []
        self.suffix_list = []
        self.nickname_list = []
        self.unparsable = True

        self.pre_process()

        self._full_name = self.collapse_whitespace(self._full_name)

        # break up full_name by commas
        parts = [x.strip() for x in self._full_name.split(",")]

        log.debug("full_name: {0}".format(self._full_name))
        log.debug("parts: {0}".format(parts))

        if len(parts) == 1:

            # no commas, title first middle middle middle last suffix
            #            part[0]

            pieces = self.parse_pieces(parts)
            p_len = len(pieces)
            for i, piece in enumerate(pieces):
                try:
                    nxt = pieces[i + 1]
                except IndexError:
                    nxt = None

                # title must have a next piece, unless it's just a title
                if self.is_title(piece) and (nxt
                                             or p_len == 1) and not self.first:
                    self.title_list.append(piece)
                    continue
                if not self.first:
                    self.first_list.append(piece)
                    continue
                if self.are_suffixes(pieces[i+1:]) or \
                        (
                            # if the next piece is the last piece and a roman numeral
                            # but this piece is not an initial
                            self.is_roman_numeral(nxt) and i == p_len - 2
                            and not self.is_an_initial(piece)
                        ):
                    self.last_list.append(piece)
                    self.suffix_list += pieces[i + 1:]
                    break
                if not nxt:
                    self.last_list.append(piece)
                    continue

                self.middle_list.append(piece)
        else:
            # if all the end parts are suffixes and there is more than one piece in
            # the first part. (Suffixes will never appear after last names only, and
            # allows potential first names to be in suffixes, e.g. "Johnson, Bart"
            if self.are_suffixes(
                    parts[1].split(' ')) and len(parts[0].split(' ')) > 1:

                # suffix comma: title first middle last [suffix], suffix [suffix] [, suffix]
                #               parts[0],                         parts[1:...]

                self.suffix_list += parts[1:]
                pieces = self.parse_pieces(parts[0].split(' '))
                log.debug("pieces: {0}".format(u(pieces)))
                for i, piece in enumerate(pieces):
                    try:
                        nxt = pieces[i + 1]
                    except IndexError:
                        nxt = None

                    if self.is_title(piece) and (nxt or len(pieces)
                                                 == 1) and not self.first:
                        self.title_list.append(piece)
                        continue
                    if not self.first:
                        self.first_list.append(piece)
                        continue
                    if self.are_suffixes(pieces[i + 1:]):
                        self.last_list.append(piece)
                        self.suffix_list = pieces[i + 1:] + self.suffix_list
                        break
                    if not nxt:
                        self.last_list.append(piece)
                        continue
                    self.middle_list.append(piece)
            else:

                # lastname comma: last [suffix], title first middles[,] suffix [,suffix]
                #                 parts[0],      parts[1],              parts[2:...]
                pieces = self.parse_pieces(parts[1].split(' '), 1)

                log.debug("pieces: {0}".format(u(pieces)))

                # lastname part may have suffixes in it
                lastname_pieces = self.parse_pieces(parts[0].split(' '), 1)
                for piece in lastname_pieces:
                    # the first one is always a last name, even if it look like a suffix
                    if self.is_suffix(piece) and len(self.last_list) > 0:
                        self.suffix_list.append(piece)
                    else:
                        self.last_list.append(piece)

                for i, piece in enumerate(pieces):
                    try:
                        nxt = pieces[i + 1]
                    except IndexError:
                        nxt = None

                    if self.is_title(piece) and (nxt or len(pieces)
                                                 == 1) and not self.first:
                        self.title_list.append(piece)
                        continue
                    if not self.first:
                        self.first_list.append(piece)
                        continue
                    if self.is_suffix(piece):
                        self.suffix_list.append(piece)
                        continue
                    self.middle_list.append(piece)
                try:
                    if parts[2]:
                        self.suffix_list += parts[2:]
                except IndexError:
                    pass

        if len(self) < 0:
            log.info("Unparsable: \"{}\" ".format(self.original))
        else:
            self.unparsable = False
        self.post_process()

Exemple #12

0

Afficher le fichier

Fichier : names_test.py Projet : vaneseltine/python-nameparser

 def test_formating_of_nicknames_with_parenthesis(self):
     hn = HumanName("Rev John A. Kenneth Doe III (Kenny)")
     hn.string_format = "{title} {first} {middle} {last} {suffix} ({nickname})"
     assert u(hn) == "Rev John A. Kenneth Doe III (Kenny)"
     hn.nickname = ""
     assert u(hn) == "Rev John A. Kenneth Doe III"

Exemple #13

0

Afficher le fichier

 def __hash__(self):
     return hash((u(self)).lower())

Exemple #14

0

Afficher le fichier

Fichier : names_test.py Projet : vaneseltine/python-nameparser

 def test_formating_of_nicknames_with_single_quotes(self):
     hn = HumanName("Rev John A. Kenneth Doe III (Kenny)")
     hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'"
     assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'"
     hn.nickname = ""
     assert u(hn) == "Rev John A. Kenneth Doe III"

Exemple #15

0

Afficher le fichier

Fichier : parser.py Projet : derek73/python-nameparser

 def __eq__(self, other):
     """
     HumanName instances are equal to other objects whose 
     lower case unicode representation is the same.
     """
     return (u(self)).lower() == (u(other)).lower()

Exemple #16

0

Afficher le fichier

Fichier : parser.py Projet : derek73/python-nameparser

 def __ne__(self, other):
     return not (u(self)).lower() == (u(other)).lower()

Exemple #17

0

Afficher le fichier

Fichier : parser.py Projet : derek73/python-nameparser

    def parse_full_name(self):
        """
        
        The main parse method for the parser. This method is run upon
        assignment to the :py:attr:`full_name` attribute or instantiation.

        Basic flow is to hand off to :py:func:`pre_process` to handle
        nicknames. It then splits on commas and chooses a code path depending
        on the number of commas.
        
        :py:func:`parse_pieces` then splits those parts on spaces and
        :py:func:`join_on_conjunctions` joins any pieces next to conjunctions. 
        """
        
        self.title_list = []
        self.first_list = []
        self.middle_list = []
        self.last_list = []
        self.suffix_list = []
        self.nickname_list = []
        self.unparsable = True
        
        
        self.pre_process()
        
        self._full_name = self.collapse_whitespace(self._full_name)
        
        # break up full_name by commas
        parts = [x.strip() for x in self._full_name.split(",")]
        
        log.debug("full_name: %s", self._full_name)
        log.debug("parts: %s", parts)
        
        if len(parts) == 1:
            
            # no commas, title first middle middle middle last suffix
            #            part[0]
            
            pieces = self.parse_pieces(parts)
            p_len = len(pieces)
            for i, piece in enumerate(pieces):
                try:
                    nxt = pieces[i + 1]
                except IndexError:
                    nxt = None
                
                # title must have a next piece, unless it's just a title
                if self.is_title(piece) \
                        and (nxt or p_len == 1) \
                        and not self.first:
                    self.title_list.append(piece)
                    continue
                if not self.first:
                    if p_len == 1 and self.nickname:
                        self.last_list.append(piece)
                        continue
                    self.first_list.append(piece)
                    continue
                if self.are_suffixes(pieces[i+1:]) or \
                        ( 
                            # if the next piece is the last piece and a roman
                            # numeral but this piece is not an initial
                            self.is_roman_numeral(nxt) and i == p_len - 2 
                            and not self.is_an_initial(piece)
                        ):
                    self.last_list.append(piece)
                    self.suffix_list += pieces[i+1:]
                    break
                if not nxt:
                    self.last_list.append(piece)
                    continue
                
                self.middle_list.append(piece)
        else:
            # if all the end parts are suffixes and there is more than one piece
            # in the first part. (Suffixes will never appear after last names
            # only, and allows potential first names to be in suffixes, e.g.
            # "Johnson, Bart"
            if self.are_suffixes(parts[1].split(' ')) \
                    and len(parts[0].split(' ')) > 1:
                
                # suffix comma: 
                # title first middle last [suffix], suffix [suffix] [, suffix]
                #               parts[0],          parts[1:...]
               
                
                self.suffix_list += parts[1:]
                pieces = self.parse_pieces(parts[0].split(' '))
                log.debug("pieces: %s", u(pieces))
                for i, piece in enumerate(pieces):
                    try:
                        nxt = pieces[i + 1]
                    except IndexError:
                        nxt = None

                    if self.is_title(piece) \
                            and (nxt or len(pieces) == 1) \
                            and not self.first:
                        self.title_list.append(piece)
                        continue
                    if not self.first:
                        self.first_list.append(piece)
                        continue
                    if self.are_suffixes(pieces[i+1:]):
                        self.last_list.append(piece)
                        self.suffix_list = pieces[i+1:] + self.suffix_list
                        break
                    if not nxt:
                        self.last_list.append(piece)
                        continue
                    self.middle_list.append(piece)
            else:
                
                # lastname comma: 
                # last [suffix], title first middles[,] suffix [,suffix]
                #      parts[0],      parts[1],              parts[2:...]
                pieces = self.parse_pieces(parts[1].split(' '), 1)
                
                log.debug("pieces: %s", u(pieces))
                
                # lastname part may have suffixes in it
                lastname_pieces = self.parse_pieces(parts[0].split(' '), 1)
                for piece in lastname_pieces:
                    # the first one is always a last name, even if it looks like
                    # a suffix
                    if self.is_suffix(piece) and len(self.last_list) > 0:
                        self.suffix_list.append(piece)
                    else:
                        self.last_list.append(piece)
                
                for i, piece in enumerate(pieces):
                    try:
                        nxt = pieces[i + 1]
                    except IndexError:
                        nxt = None
                    
                    if self.is_title(piece) \
                            and (nxt or len(pieces) == 1) \
                            and not self.first:
                        self.title_list.append(piece)
                        continue
                    if not self.first:
                        self.first_list.append(piece)
                        continue
                    if self.is_suffix(piece):
                        self.suffix_list.append(piece)
                        continue
                    self.middle_list.append(piece)
                try:
                    if parts[2]:
                        self.suffix_list += parts[2:]
                except IndexError:
                    pass
                
        if len(self) < 0:
            log.info("Unparsable: \"%s\" ", self.original)
        else:
            self.unparsable = False
        self.post_process()

Exemple #18

0

Afficher le fichier

Fichier : names_test.py Projet : vaneseltine/python-nameparser

 def test_quote_nickname_formating(self):
     hn = HumanName("Rev John A. Kenneth Doe III (Kenny)")
     hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'"
     assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'"
     hn.string_format = "{last}, {title} {first} {middle}, {suffix} '{nickname}'"
     assert u(hn) == "Doe, Rev John A. Kenneth, III 'Kenny'"

Exemple #19

0

Afficher le fichier

    def parse_full_name(self):
        """
        The main parse method for the parser. This method is run upon assignment to the
        :py:attr:`full_name` attribute or instantiation.

        Basic flow is to hand off to :py:func:`pre_process` to handle nicknames. It
        then splits on commas and chooses a code path depending on the number of commas.
        :py:func:`parse_pieces` then splits those parts on spaces and
        :py:func:`join_on_conjunctions` joins any pieces next to conjunctions. 
        """

        self.title_list = []
        self.first_list = []
        self.middle_list = []
        self.last_list = []
        self.suffix_list = []
        self.nickname_list = []
        self.unparsable = True

        if not isinstance(self._full_name, text_type):
            self._full_name = u(self._full_name, self.ENCODING)

        self.pre_process()

        # collapse multiple spaces
        self._full_name = self.C.regexes.spaces.sub(" ",
                                                    self._full_name.strip())

        # break up full_name by commas
        parts = [x.strip() for x in self._full_name.split(",")]

        log.debug("full_name: {0}".format(self._full_name))
        log.debug("parts: {0}".format(parts))

        if len(parts) == 1:

            # no commas, title first middle middle middle last suffix

            pieces = self.parse_pieces(parts)

            for i, piece in enumerate(pieces):
                try:
                    nxt = pieces[i + 1]
                except IndexError:
                    nxt = None

                # title must have a next piece, unless it's just a title
                if self.is_title(piece) and (nxt or len(pieces) == 1):
                    self.title_list.append(piece)
                    continue
                if not self.first:
                    self.first_list.append(piece)
                    continue
                if (i == len(pieces) - 2) and self.is_suffix(nxt):
                    self.last_list.append(piece)
                    self.suffix_list.append(nxt)
                    break
                if not nxt:
                    self.last_list.append(piece)
                    continue

                self.middle_list.append(piece)
        else:
            if self.is_suffix(parts[1]):

                # suffix comma: title first middle last, suffix [, suffix]

                self.suffix_list += parts[1:]

                pieces = self.parse_pieces(parts[0].split(' '))
                log.debug("pieces: {0}".format(u(pieces)))

                for i, piece in enumerate(pieces):
                    try:
                        nxt = pieces[i + 1]
                    except IndexError:
                        nxt = None

                    if self.is_title(piece) and (nxt or len(pieces) == 1):
                        self.title_list.append(piece)
                        continue
                    if not self.first:
                        self.first_list.append(piece)
                        continue
                    if not nxt:
                        self.last_list.append(piece)
                        continue
                    self.middle_list.append(piece)
            else:

                # lastname comma: last, title first middles[,] suffix [,suffix]
                pieces = self.parse_pieces(parts[1].split(' '), 1)

                log.debug("pieces: {0}".format(u(pieces)))

                self.last_list.append(parts[0])
                for i, piece in enumerate(pieces):
                    try:
                        nxt = pieces[i + 1]
                    except IndexError:
                        nxt = None

                    if self.is_title(piece) and (nxt or len(pieces) == 1):
                        self.title_list.append(piece)
                        continue
                    if not self.first:
                        self.first_list.append(piece)
                        continue
                    if self.is_suffix(piece):
                        self.suffix_list.append(piece)
                        continue
                    self.middle_list.append(piece)
                try:
                    if parts[2]:
                        self.suffix_list += parts[2:]
                except IndexError:
                    pass

        if len(self) < 0:
            log.info("Unparsable full_name: " + self._full_name)
        else:
            self.unparsable = False
            self.post_process()

Exemple #20

0

Afficher le fichier

Fichier : parser.py Projet : liormagen/python-nameparser

 def __eq__(self, other):
     """
     HumanName instances are equal to other objects whose 
     lower case unicode representation is the same.
     """
     return (u(self)).lower() == (u(other)).lower()

Exemple #21

0

Afficher le fichier

Fichier : names_test.py Projet : vaneseltine/python-nameparser

 def test_formating_of_nicknames_with_double_quotes(self):
     hn = HumanName("Rev John A. Kenneth Doe III (Kenny)")
     hn.string_format = '{title} {first} {middle} {last} {suffix} "{nickname}"'
     assert u(hn) == 'Rev John A. Kenneth Doe III "Kenny"'
     hn.nickname = ""
     assert u(hn) == "Rev John A. Kenneth Doe III"

Exemple #22

0

Afficher le fichier

Fichier : names_test.py Projet : vaneseltine/python-nameparser

 def test_keep_non_emojis(self):
     hn = HumanName("∫≜⩕ Smith 😊")
     assert hn.first == "∫≜⩕"
     assert hn.last == "Smith"
     assert u(hn) == "∫≜⩕ Smith"

Exemple #23

0

Afficher le fichier

Fichier : names_test.py Projet : vaneseltine/python-nameparser

 def test_remove_emojis(self):
     hn = HumanName("Sam Smith 😊")
     assert hn.first == "Sam"
     assert hn.last == "Smith"
     assert u(hn) == "Sam Smith"

Exemple #24

0

Afficher le fichier

Fichier : names_test.py Projet : vaneseltine/python-nameparser

 def test_formating_of_nicknames_in_middle(self):
     hn = HumanName("Rev John A. Kenneth Doe III (Kenny)")
     hn.string_format = "{title} {first} ({nickname}) {middle} {last} {suffix}"
     assert u(hn) == "Rev John (Kenny) A. Kenneth Doe III"
     hn.nickname = ""
     assert u(hn) == "Rev John A. Kenneth Doe III"

Exemple #25

0

Afficher le fichier

Fichier : parser.py Projet : liormagen/python-nameparser

 def __ne__(self, other):
     return not (u(self)).lower() == (u(other)).lower()

Exemple #26

0

Afficher le fichier

Fichier : names_test.py Projet : vaneseltine/python-nameparser

 def test_formatting_init_argument(self):
     hn = HumanName("Rev John A. Kenneth Doe III (Kenny)",
                    string_format="TEST1")
     assert u(hn) == "TEST1"