def parse_name(self, name_raw):
        """
        Parses a raw_name, e.g. "Corbato, F.J." into first, middle, and last name

        >>> p=Person(name_raw='Corbato, F.J.')
        >>> p.last, p.first, p.middle
        ('Corbató', 'F', 'J')

        :param name_raw:
        :return:
        """

        # fix names like "Verzuh M., F.", where the middle name comes after the last name
        # -> it should be Verzuh, F. M.
        match = re.match('([A-Z][a-z]+) ([A-Z])\., ([A-Z][a-z]*)\.*', name_raw)
        if match:
            name_raw = f'{match.groups()[0]}, {match.groups()[2]}. {match.groups()[1]}.'

        name = HumanName(name_raw)
        # If first and middle initials have periods but not spaces -> separate, e.g. "R.K. Teague"
        if re.match('[a-zA-Z]\.[a-zA-Z]\.', name.first):
            name.middle = name.first[2]
            name.first = name.first[0]

        name.last = name.last.capitalize()
        name.first = name.first.strip('.').capitalize()
        name.middle = name.middle.strip('.').capitalize()

        last_name_replacements = [('Corbato', 'Corbató'),
                                  ('Corbatò', 'Corbató'), ('Verguh', 'Verzuh')]
        for replacement in last_name_replacements:
            name.last = name.last.replace(replacement[0], replacement[1])

        return name.last, name.first, name.middle
Example #2
0
def normalize_name(first_name, last_name):
    """Normalizes capitalization of first and last name."""
    name = HumanName()
    name.first = first_name
    name.last = last_name
    name.capitalize()
    return (name.first, name.last)
Example #3
0
 def sort_contributor(self, c: ParsedSegment, default_type=None):
     '''
     Sort a contributor into lists based on agent type.
     '''
     name = HumanName(c.name)
     initials = ''.join(rgx.abbr.findall(c.name))
     _type = default_type
     if name.last in self.contributors['person']:
         _type = 'person'
     else:
         for k, v in self.contributors.items():
             if k == 'person':
                 continue
             if initials in v:
                 if any([x.name == c.name for x in v[initials]]):
                     _type = k
     if _type is None:
         i = multi_choice(
             'What type of contributor is "{0}"?'.format(c.name),
             self.contributors.keys())
         _type = list(self.contributors.keys())[i]
     if _type == 'person':
         if name.last == '':
             name.last = name.first
             name.first = '?'
         if name.first == '':
             name.first = '?'
         c.name = name
         family_name_records = self.contributors[_type].get(name.last, {})
         initial_records = family_name_records.get(name.first[0], []) + [c]
         family_name_records[name.first[0]] = initial_records
         self.contributors[_type][name.last] = family_name_records
     else:
         self.contributors[_type][initials] = self.contributors[_type].get(
             initials, []) + [c]
Example #4
0
def lookup_judge_by_last_name(
    last_name: str,
    court_id: str,
    event_date: Optional[date] = None,
) -> Optional[Person]:
    """Look up the judge using their last name, a date and court"""
    hn = HumanName()
    hn.last = last_name
    return lookup_judge_by_full_name(hn, court_id, event_date)
Example #5
0
 def test_assignment_to_attribute(self):
     hn = HumanName("John A. Kenneth Doe, Jr.")
     hn.last = "de la Vega"
     self.m(hn.last,"de la Vega", hn)
     hn.title = "test"
     self.m(hn.title,"test", hn)
     hn.first = "test"
     self.m(hn.first,"test", hn)
     hn.middle = "test"
     self.m(hn.middle,"test", hn)
     hn.suffix = "test"
     self.m(hn.suffix,"test", hn)
 def test_assign_list_to_attribute(self):
     hn = HumanName("John A. Kenneth Doe, Jr.")
     hn.title = ["test1", "test2"]
     assert hn.title == "test1 test2"
     hn.first = ["test3", "test4"]
     assert hn.first == "test3 test4"
     hn.middle = ["test5", "test6", "test7"]
     assert hn.middle == "test5 test6 test7"
     hn.last = ["test8", "test9", "test10"]
     assert hn.last == "test8 test9 test10"
     hn.suffix = ["test"]
     assert hn.suffix == "test"
Example #7
0
 def _massage_measure_donor_name(self, name_string):
     """
     """
     name = HumanName(name_string)
     name.first = name.first.title()
     name.last = name.last.title()
     if name.middle:
         name.middle = name.middle.replace(".", "")
         name.middle = "%s." % (name.middle.title())
     if name == "JR. Munger CHARLES T.":
         name.first = "Charles"
         name.middle = "T."
         name.last = "Munger"
         name.suffix = "Jr."
     if name == "M. Quinn. Delaney":
         name.first = "M."
         name.middle = "Quinn"
         name.last = "Delaney"
         name.suffix = None
     if name == "Robert Alan. Eustace":
         name.first = "Robert"
         name.middle = "Alan"
         name.last = "Eustace"
         name.suffix = None
     if name == "Susie Tompkins. Buell":
         name.first = "Susie"
         name.middle = "Tompkins"
         name.last = "Buell"
         name.suffix = None
     if name.middle and name.suffix:
         output = "%s %s %s %s" % (name.first, name.middle, name.last,
                                   name.suffix)
     if name.middle:
         output = "%s %s %s" % (name.first, name.middle, name.last)
     elif name.suffix:
         output = "%s %s %s" % (name.first, name.last, name.suffix)
     else:
         output = "%s %s" % (name.first, name.last)
     return output
Example #8
0
 def from_parts(cls,
                first=None,
                last=None,
                middle=None,
                suffix=None,
                title=None):
     name = HumanName()
     name.first = first
     name.middle = middle
     name.last = last
     name.suffix = suffix
     name.title = title
     return ParsedName(name)
 def _massage_measure_donor_name(self, name_string):
     """
     """
     name = HumanName(name_string)
     name.first = name.first.title()
     name.last = name.last.title()
     if name.middle:
         name.middle = name.middle.replace(".", "")
         name.middle = "%s." % (name.middle.title())
     if name == "JR. Munger CHARLES T.":
         name.first = "Charles"
         name.middle = "T."
         name.last = "Munger"
         name.suffix = "Jr."
     if name == "M. Quinn. Delaney":
         name.first = "M."
         name.middle = "Quinn"
         name.last = "Delaney"
         name.suffix = None
     if name == "Robert Alan. Eustace":
         name.first = "Robert"
         name.middle = "Alan"
         name.last = "Eustace"
         name.suffix = None
     if name == "Susie Tompkins. Buell":
         name.first = "Susie"
         name.middle = "Tompkins"
         name.last = "Buell"
         name.suffix = None
     if name.middle and name.suffix:
         output = "%s %s %s %s" % (name.first, name.middle, name.last, name.suffix)
     if name.middle:
         output = "%s %s %s" % (name.first, name.middle, name.last)
     elif name.suffix:
         output = "%s %s %s" % (name.first, name.last, name.suffix)
     else:
         output = "%s %s" % (name.first, name.last)
     return output
Example #10
0
def lookup_judges_by_last_name_list(
    last_names: List[str],
    court_id: str,
    event_date: Optional[date] = None,
) -> List[Person]:
    """Look up a group of judges by list of last names, a date, and a court"""
    found_people = []
    for last_name in last_names:
        hn = HumanName()
        hn.last = last_name
        person = lookup_judge_by_full_name(hn, court_id, event_date)
        if person is not None:
            found_people.append(person)
    return found_people
Example #11
0
    def _massage_payload(self, payload):
        for k, v in payload.items():
            if pd.isnull(v) or not v:
                # Replace nan or None with empty string.
                payload[k] = ""

        # Ensure names aren't all caps or all lowercase.
        if payload.get("firstname") and payload.get("lastname"):
            name = HumanName()
            name.first = payload["firstname"]
            name.last = payload["lastname"]
            name.capitalize()
            payload["firstname"] = name.first
            payload["lastname"] = name.last
 def test_assignment_to_attribute(self):
     hn = HumanName("John A. Kenneth Doe, Jr.")
     hn.last = "de la Vega"
     assert hn.last == "de la Vega"
     hn.title = "test"
     assert hn.title == "test"
     hn.first = "test"
     assert hn.first == "test"
     hn.middle = "test"
     assert hn.middle == "test"
     hn.suffix = "test"
     assert hn.suffix == "test"
     with pytest.raises(TypeError):
         hn.suffix = [["test"]]
     with pytest.raises(TypeError):
         hn.suffix = {"test": "test"}
Example #13
0
    def HumanNameFmXML(self, ell):
        hn = HumanName()
        for el in ell:
            if el.tag == 'First':
                hn.first = el.text
            elif el.tag == 'Middle':
                hn.middle = el.text
            elif el.tag == 'Last':
                hn.last = el.text
            elif el.tag == 'Title':
                hn.title = el.text
            elif el.tag == 'Suffix':
                hn.suffix = el.text
            elif el.tag == 'NickName':
                hn.nickname = el.text
            else:
                pass

        return hn
Example #14
0
    def person_name_from_xml(self, ell):
        '''Create a person mane from an XML element.'''
        hname = HumanName()
        for elm in ell:
            if elm.tag == 'First':
                hname.first = elm.text
            elif elm.tag == 'Middle':
                hname.middle = elm.text
            elif elm.tag == 'Last':
                hname.last = elm.text
            elif elm.tag == 'Title':
                hname.title = elm.text
            elif elm.tag == 'Suffix':
                hname.suffix = elm.text
            elif elm.tag == 'NickName':
                hname.nickname = elm.text
            else:
                pass

        return hname
    def parse_raw_name(name_raw: str) -> (str, str, str, set):
        """
        Parses a (usually messy) raw name and returns
        first, middle, last names and a set of extracted positions

        :param name_raw: str
        :return: str, str, str, set


        Parses name and returns as human name
        >>> n = Person('TEAGUE CE JR')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Teague', 'C', 'E', 'JR')

        >>> n = Person('teague, ce jr')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Teague', 'C', 'E', 'JR')


        >>> n = Person('Teague, Claude Edward, Jr., Ph.D. ')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Teague', 'Claude', 'Edward', 'JR., PH.D.')

        >>> n = Person('Teague, J - BAT')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Teague', 'J', '', 'BAT')

        >>> n = Person('BAKER, T E - NATIONAL ASSOCIATION OF ATTORNEYS GENERAL')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Baker', 'T', 'E', 'NATIONAL ASSOCIATION OF ATTORNEYS GENERAL')

        >>> n = Person('BAKER-cj')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Baker', 'C', 'J', '')

        JR and SR are by default recognized as titles -> turn off through CONSTANTS.
        >>> n = Person('Baker, JR')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Baker', 'J', 'R', '')

        >>> n = Person('DUNN WL #')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Dunn', 'W', 'L', '')

        >>> n = Person('Dunn, W. L.')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Dunn', 'W', 'L', '')

        >>> n = Person('TEMKO SL, COVINGTON AND BURLING')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Temko', 'S', 'L', 'COVINGTON AND BURLING')

        >>> n = Person('Temko, Stanley L [Privlog:] TEMKO,SL')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Temko', 'Stanley', 'L', '')

        >>> n = Person('Temko-SL, Covington & Burling')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Temko', 'S', 'L', 'COVINGTON & BURLING')

        >>> n = Person('HENSON, A. (AMERICAN SENIOR VICE PRESIDENT AND GENERAL COUNSEL)')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Henson', 'A', '', 'AMERICAN SENIOR VICE PRESIDENT AND GENERAL COUNSEL')

        >>> n = Person('HENSON, A. (CHADBOURNE, PARKE, WHITESIDE & WOLFF, AMERICAN OUTSIDE COUNSEL) (HANDWRITTEN NOTES)')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Henson', 'A', '', 'CHADBOURNE, PARKE, WHITESIDE & WOLFF, AMERICAN OUTSIDE COUNSEL HANDWRITTEN NOTES')

        >>> n = Person('Holtzman, A.,  Murray, J. ,  Henson, A. ,  Pepples, E. ,  Stevens, A. ,  Witt, S.')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Holtzman', 'A', '', '')

        >>> n = Person('Holtz, Jacob, Jacob & Medinger')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Holtz', 'Jacob', '', 'JACOB & MEDINGER')

        # This one breaks. But I don't think it can be avoided.
        >>> n = Person('Holtz, Jacob Alexander, Jacob & Medinger')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Holtz', '', '', 'JACOB ALEXANDER, JACOB & MEDINGER')

        >>> n = Person('PROCTOR DF, JOHNS HOPKINS SCHOOL OF HYGIENE')
        >>> n.last, n.first, n.middle, " ".join(n.positions).upper()
        ('Proctor', 'D', 'F', 'JOHNS HOPKINS SCHOOL OF HYGIENE')

        """

        # remove privlog info, e.g. 'Temko, Stanley L [Privlog:] TEMKO,SL'. It confuses
        # the name parser
        privlog_id = name_raw.find('[Privlog:]')
        if privlog_id == 0:
            name_raw = name_raw[privlog_id:]
        elif privlog_id > 0:
            name_raw = name_raw[:name_raw.find('[Privlog:]')]
        else:
            pass

        # position is often attached with a dash, e.g. 'BAKER, T E - NATIONAL ASSOCIATION OF ATTORNEYS'
        if name_raw.find(" - ") > -1 and len(name_raw.split(' - ')) == 2:
            name_raw, extracted_position = name_raw.split(" - ")
            extracted_positions = {extracted_position.strip()}
        else:
            extracted_positions = set()

        # extract positions in parens e.g. Henson, A (Chadbourne & Park)
        paren_positions = re.findall(r'\([^(]+\)', name_raw)
        for position in paren_positions:
            extracted_positions.add(position.strip(',#() '))
            name_raw = name_raw.replace(position, '')

        institution_regexes = [

            # TI/CTR
            r'[,#] Tobacco Inst.+$',
            r'[\(\,\#] ?SAB Exec.*$',

            # American Tobacco
            r'[(,#] ?American .+$',
            r'[\(\,\#] ?Amer Brands.*$',
            r'[,#] American Tob',
            r'[,#] Atco.*$',

            # PM
            r'[\(\,\#] ?Philip Morris.*$',

            # RJR
            r'[\(\,\#] ?RJR.*$',

            # LAW FIRMS
            r'[\(\,\#] ?Arnold &.*$',
            r'[\(\,\#] ?Chadbourne.*$',
            r'[,#] COVINGTON [AB&]*.+$',
            r'[,#] Foster [&A]*.+$',
            r'[,#] JACOB [A&]*.+$',

            r'[\(\,\#] ?Philip Morris.*$',

            # Universities
            # match a ( or , or # at the beginning, then some characters that
            # aren't (,# until the end of the string
            r'[\(\,\#][^\(\,\#]+ Univ\b.*$',

            # Univ is fine if it appears at the end of a string (don't want to match in the
            # middle of a string, e.g "Universal"
            r'[\(\,\#][^\(\,\#]+ School\b.*$',

            # Organizations
            r'[\(\,\#][^\(\,\#]+ Federal Trade Commission.*$',

        ]
        for institution in institution_regexes:
            extracted_institution = re.search(institution, name_raw, re.IGNORECASE)
            if extracted_institution:
                extracted_positions.add(extracted_institution.group().strip(',#() '))
                name_raw = name_raw[:name_raw.find(extracted_institution.group())]

        # remove #
        name_raw = name_raw.strip("#").strip()

        if name_raw[-2] == '-':
            name_raw = name_raw[:-2] + " " + name_raw[-1:]
        if len(name_raw) > 2 and name_raw[-3] == '-':
            name_raw = name_raw[:-3] + " " + name_raw[-2:]

        name = HumanName(name_raw)

        # e.g. Dunn W -> parsed as last name W. -> switch first/last
        if len(name.last) <= 2 and len(name.first) > 2:
            name.first, name.last = name.last, name.first

        # remove periods from initials
        if len(name.first) == 2 and name.first[1] == '.':
            name.first = name.first[0]
        if len(name.middle) == 2 and name.middle[1] == '.':
            name.middle = name.middle[0]

        # If first name is length 2 (Teague, CE), the two letters are most likely initials.
        if len(name.first) == 2:
            name.middle = name.first[1].upper()
            name.first = name.first[0].upper()

        # If first and middle initials have periods but not spaces -> separate, e.g. "R.K. Teague"
        if re.match(r'[a-zA-Z]\.[a-zA-Z]\.', name.first):
            name.middle = name.first[2]
            name.first = name.first[0]

        name.last = name.last.capitalize()
        name.first = name.first.capitalize()
        name.middle = name.middle.capitalize()

        # if multiple names are passed, they often end up in the middle name
        # e.g. 'Holtzman, A.,  Murray, J. ,  Henson, A.  -> only allow one comma or set to empty
        if name.middle.count(',') > 1:
            name.middle = ''

        if len(name.suffix) > 20 and name.suffix.count('.') > 2:
            name.suffix = ''

        if name.suffix:
            extracted_positions.add(name.suffix)

        return name.first, name.middle, name.last, extracted_positions
Example #16
0
parsed = HumanName( name )

# look at how that turned out:
print( "Parsed HumanName for " + name + ":" )
print( Person.HumanName_to_str( parsed ) )

# now, make a second HumanName instance.
manual = HumanName()

# look at how that turned out:
print( "Empty HumanName?:" )
print( Person.HumanName_to_str( manual ) )

# override parsed values with correct name parts
manual.first = "Van"
manual.last = "Conway"

# look at how that turned out:
print( "after manual configuration:" )
print( Person.HumanName_to_str( manual ) )

# now, try some lookups

# let the lookup parse the name.
test1 = Person.look_up_person_from_name( name )
print( "test1 = " + str( test1 ) )

# pass in manually configured HumanName
test2 = Person.look_up_person_from_name( name, manual )
print( "test2 = " + str( test2 ) )
Example #17
0
def import_player_data(team, roster_soup):

    #Determines if the team is the home or away team for the game
    if team.is_home():
        homeaway = "home"
        game_id = team.home.all()[0].game_id
    elif team.is_away():
        homeaway = "away"
        game_id = team.away.all()[0].game_id
    else:
        raise (Exception("TeamGame object (%s) has no home or away status") %
               team)

    ##GET ROSTER

    #Indicates the list index of "roster" from which to begin iterating
    START_ROSTER = 3

    #Indices of player data within each player's 'roster' tags
    POSITION_INDEX = 3
    NAME_INDEX = 5
    NUMBER_INDEX = 1

    #number is a value to determine which index of the BeautifulSoup object to extract data from
    if homeaway == "home":
        number = 1
    else:
        number = 0

    #Sets 'roster' to a BeautifulSoup object which contains the data for a team's roster
    roster = roster_soup.find_all("td", text="#")[number].parent.parent

    #Iterates over the 'roster', skipping every other item (null tags)
    for i in range(START_ROSTER, len(roster), 2):

        position = roster.contents[i].contents[POSITION_INDEX].text.encode(
            'utf-8')

        name = roster.contents[i].contents[NAME_INDEX].text.encode('utf-8')
        hname = HumanName(name)
        first_name = hname.first
        last_name = hname.last

        number = roster.contents[i].contents[NUMBER_INDEX].text.encode('utf-8')

        #Checks if player exists or if multiple players with this name.
        #If player does not exist, add to Player database and PlayerGame database.
        #If player exists, add to PlayerGame database.
        try:
            p = Player.objects.get(last_name=last_name,
                                   team=team.team,
                                   number=number)
            PlayerGame.objects.create(player=p, team=team)
        except Player.MultipleObjectsReturned:
            #Check if multiple players in the league or if player has been traded
            print "Multiple players returned"
            pass
        except Player.DoesNotExist:
            #Check if new player or data error
            p = Player.objects.create(first_name=first_name,
                                      last_name=last_name,
                                      position=position,
                                      number=number,
                                      team=team.team)
            PlayerGame.objects.create(player=p, team=team)
            #print "Added - " + p.first_name + " " + p.last_name + " (" + p.position + ") " + team.team.initials

    ###GET SHIFTS

    if homeaway == "home":
        shift_soup = get_soup(game_id, 'shifts_home')
    else:
        shift_soup = get_soup(game_id, 'shifts_away')

    #Get list of players from BeautifulSoup object
    players = shift_soup.find_all('td', class_='playerHeading')

    for player in players:
        playerName = HumanName(player.text)

        #Strip out player's number from name
        split_string = string.split(playerName.last, " ", 1)
        number = split_string[0]
        playerName.last = split_string[1]
        playerObj = Player.objects.get(last_name=playerName.last,
                                       team=team.team,
                                       number=number)

        #Create new object for PlayerGame and save to database
        playerGameObj = PlayerGame.objects.get(player=playerObj, team=team)

        #Parse the individual player BeautifulSoup object for shift data
        shift = player.parent.next_sibling.next_sibling.next_sibling.next_sibling
        shift_list = shift.find_all("td")

        #If 6 items in shift_list object, it matches the structure of valid shift data
        while (len(shift_list) == 6):
            period = shift_list[1].text
            if period == "OT":
                period = 4
            else:
                period = int(period)
            timeOn = deleteAfter(shift_list[2].text, '/')
            timeOn = convertToSecs(timeOn, period)
            timeOff = deleteAfter(shift_list[3].text, '/')
            timeOff = convertToSecs(timeOff, period)

            #this is to prevent potential source data issues where the end of a period is mistakenly set to 0:00
            if timeOff == (period - 1) * 1200 and timeOn > (period - 1) * 1200:
                timeOff = period * 1200

            ShiftGame.objects.create(playergame=playerGameObj,
                                     start_time=timeOn,
                                     end_time=timeOff)

            #Get next shift for this player
            shift = shift.next_sibling.next_sibling
            shift_list = shift.find_all("td")

    #don't need to include goalies in line combinations
    ShiftGame.objects.filter(playergame__player__position='G').delete()

    return True
Example #18
0
def import_player_data(team, roster_soup):

    # Determines if the team is the home or away team for the game
    if team.is_home():
        homeaway = "home"
        game_id = team.home.all()[0].game_id
    elif team.is_away():
        homeaway = "away"
        game_id = team.away.all()[0].game_id
    else:
        raise (Exception("TeamGame object (%s) has no home or away status") % team)

        ##GET ROSTER

        # Indicates the list index of "roster" from which to begin iterating
    START_ROSTER = 3

    # Indices of player data within each player's 'roster' tags
    POSITION_INDEX = 3
    NAME_INDEX = 5
    NUMBER_INDEX = 1

    # number is a value to determine which index of the BeautifulSoup object to extract data from
    if homeaway == "home":
        number = 1
    else:
        number = 0

        # Sets 'roster' to a BeautifulSoup object which contains the data for a team's roster
    roster = roster_soup.find_all("td", text="#")[number].parent.parent

    # Iterates over the 'roster', skipping every other item (null tags)
    for i in range(START_ROSTER, len(roster), 2):

        position = roster.contents[i].contents[POSITION_INDEX].text.encode("utf-8")

        name = roster.contents[i].contents[NAME_INDEX].text.encode("utf-8")
        hname = HumanName(name)
        first_name = hname.first
        last_name = hname.last

        number = roster.contents[i].contents[NUMBER_INDEX].text.encode("utf-8")

        # Checks if player exists or if multiple players with this name.
        # If player does not exist, add to Player database and PlayerGame database.
        # If player exists, add to PlayerGame database.
        try:
            p = Player.objects.get(last_name=last_name, team=team.team, number=number)
            PlayerGame.objects.create(player=p, team=team)
        except Player.MultipleObjectsReturned:
            # Check if multiple players in the league or if player has been traded
            print "Multiple players returned"
            pass
        except Player.DoesNotExist:
            # Check if new player or data error
            p = Player.objects.create(
                first_name=first_name, last_name=last_name, position=position, number=number, team=team.team
            )
            PlayerGame.objects.create(player=p, team=team)
            # print "Added - " + p.first_name + " " + p.last_name + " (" + p.position + ") " + team.team.initials

            ###GET SHIFTS

    if homeaway == "home":
        shift_soup = get_soup(game_id, "shifts_home")
    else:
        shift_soup = get_soup(game_id, "shifts_away")

        # Get list of players from BeautifulSoup object
    players = shift_soup.find_all("td", class_="playerHeading")

    for player in players:
        playerName = HumanName(player.text)

        # Strip out player's number from name
        split_string = string.split(playerName.last, " ", 1)
        number = split_string[0]
        playerName.last = split_string[1]
        playerObj = Player.objects.get(last_name=playerName.last, team=team.team, number=number)

        # Create new object for PlayerGame and save to database
        playerGameObj = PlayerGame.objects.get(player=playerObj, team=team)

        # Parse the individual player BeautifulSoup object for shift data
        shift = player.parent.next_sibling.next_sibling.next_sibling.next_sibling
        shift_list = shift.find_all("td")

        # If 6 items in shift_list object, it matches the structure of valid shift data
        while len(shift_list) == 6:
            period = shift_list[1].text
            if period == "OT":
                period = 4
            else:
                period = int(period)
            timeOn = deleteAfter(shift_list[2].text, "/")
            timeOn = convertToSecs(timeOn, period)
            timeOff = deleteAfter(shift_list[3].text, "/")
            timeOff = convertToSecs(timeOff, period)

            # this is to prevent potential source data issues where the end of a period is mistakenly set to 0:00
            if timeOff == (period - 1) * 1200 and timeOn > (period - 1) * 1200:
                timeOff = period * 1200

            ShiftGame.objects.create(playergame=playerGameObj, start_time=timeOn, end_time=timeOff)

            # Get next shift for this player
            shift = shift.next_sibling.next_sibling
            shift_list = shift.find_all("td")

            # don't need to include goalies in line combinations
    ShiftGame.objects.filter(playergame__player__position="G").delete()

    return True
    def to_HumanName( self ):
        
        '''
        This method creates a nameparser HumanName() object instance for the
            Person name property values in this instance.  Returns the HumanName
            instance.
           
        preconditions: None.
        postconditions: None.
        '''
        
        # return reference
        instance_OUT = None
        
        # declare variables
        me = "to_HumanName"
        my_name_prefix = ""
        my_first_name = ""
        my_middle_name = ""
        my_last_name = ""
        my_name_suffix = ""
        my_nickname = ""
        my_full_name_string = ""
        my_lookup_name = ""
        got_name_parts = False
        
        # retrieve values from this instance
        my_name_prefix = self.get( self.PROP_NAME_NAME_PREFIX, None )
        my_first_name = self.get( self.PROP_NAME_FIRST_NAME, None )
        my_middle_name = self.get( self.PROP_NAME_MIDDLE_NAME, None )
        my_last_name = self.get( self.PROP_NAME_LAST_NAME, None )
        my_name_suffix = self.get( self.PROP_NAME_NAME_SUFFIX, None )
        my_nickname = self.get( self.PROP_NAME_NICKNAME, None )
        my_full_name_string = self.get( self.PROP_NAME_FULL_NAME_STRING, None )
        my_lookup_name = self.get_lookup_name()
        
        # got name parts?
        got_name_parts = self.got_name_parts()
        if ( got_name_parts == True ):
        
            # build human name from name parts.
            instance_OUT = HumanName()
    
            # Use nested values to populate HumanName.
            if ( my_name_prefix ):
        
                instance_OUT.title = my_name_prefix
                
            #-- END check to see if name_prefix. --#
            
            if ( my_first_name ):
        
                instance_OUT.first = my_first_name
                
            #-- END check to see if first_name. --#
            
            if ( my_middle_name ):
        
                instance_OUT.middle = my_middle_name
                
            #-- END check to see if middle_name. --#
            
            if ( my_last_name ):
        
                instance_OUT.last = my_last_name
                
            #-- END check to see if last_name. --#
            
            if ( my_name_suffix ):
        
                instance_OUT.suffix = my_name_suffix
                
            #-- END check to see if name_suffix. --#
            
            if ( my_nickname ):
        
                instance_OUT.nickname = my_nickname
                
            #-- END check to see if nickname. --#
            
        # got full name string?
        elif ( ( my_full_name_string is not None ) and ( my_full_name_string != "" ) ):
        
            # yes.  Pass it to HumanName
            instance_OUT = HumanName( my_full_name_string )
        
        # how about lookup name?
        elif ( ( my_lookup_name is not None ) and ( my_lookup_name != "" ) ):
        
            # yes.  Pass it to HumanName
            instance_OUT = HumanName( my_lookup_name )
        
        else:
        
            # no names present at all.  Return None.
            instance_OUT = None
            
        #-- END check to see what name information we have --#
                
        return instance_OUT
        
    #-- END method to_HumanName() --#


#-- END class PersonDetails --#
Example #20
0
    def namer(field):
        #pre
        if type(field) == tuple:
            w_name = re.sub(
                '[\t\r\n]', '',
                ", ".join([x.encode('ascii', 'ignore')
                           for x in field])).upper()
        else:
            w_name = re.sub('[\t\r\n]', '', field.encode('ascii',
                                                         'ignore')).upper()
        if 'ANONYMOUS' not in w_name:
            if ' FORMER ' not in w_name:
                w_name = re.split(";", w_name)[0]
            else:
                w_name = re.split(";", w_name)[1]

            w_name = re.sub("(?<=[`'/+]) | (?=['`/+])", '', w_name)  #6A, 4A-C

            out = HumanName(w_name)
            out.middle = re.sub("^[A-Z] |^[A-Z]\. ", '', out.middle)
            if " " in out.last:
                out.last = re.sub("^[A-Z] |^[A-Z]\. ", '', out.last)
            if re.sub("^[A-Z]\.|^[A-Z]", '',
                      out.first) == '' and len(out.middle) != 0:
                out.first, out.middle = out.middle, ""
            else:
                out.first = re.sub("^[A-Z] |^[A-Z]\. ", '', out.first)

            #post

            if out.middle.startswith("FOR ") or out.middle.startswith(
                    "- "):  #7A, 1B, 3E
                out.middle = ""

            if " FOR " in out.last:
                out.last = re.sub(" FOR .*", '', out.last)

            if len(out.last) == 0 and len(out.title) != 0:  #9A
                if " " in out.first:
                    out = HumanName(out.first)
                else:
                    out.first, out.last = "", out.first

            if " AND " in out.middle or " & " in out.middle:
                out.last = re.split("( AND )|( & )", out.middle)[0]
                out.middle = ""
            if "AND" in out.last or "&" in out.last:

                if out.last.startswith("AND ") or out.last.startswith(
                        "& "):  #3F
                    out.last = HumanName(out.last).last
                elif " AND " in out.last or " & " in out.last:
                    out.last = re.sub("( AND ).*|( & ).*", '', out.last)
            out.first = re.split("( AND )|&|/|\+", out.first)[0]
            out.last = re.split("/", out.last)[0].strip()
            if len(re.sub("[^A-Z]", '', out.first)) == 1 and " " in out.last:
                out.first = out.last.split(" ")[0]
                out.last = out.last.split(" ")[1]
            out.capitalize()
            first, last = out.first, out.last
            if len(out.middle) > 0:
                if re.sub("^[A-Z]\.|^[A-Z]", '', out.middle) == '':
                    out.middle = ""
                elif first.endswith("-") or out.middle.startswith("-"):
                    first += out.middle
                else:
                    first += " %s" % out.middle  #8A-B
            if len(out.suffix) > 0:
                last += " %s" % out.suffix  #2A
            return (first, last)
        else:
            name = HumanName(w_name)
            return (name.first, name.last)
Example #21
0
    def parse_raw_name(name_raw: str, count: int, extract_orgs=True) -> (str, str, str, Counter):
        """
        Parses a (usually messy) raw name and returns
        first, middle, last names and a Counter of extracted positions

        extract_orgs tries to extract organizations from name. defaults to True. only set to False
        to be able to check if a name is valid (it prevents an infinite loop because by default,
        extracting organizations is part of the initialization of a person

        :param name_raw: str
        :param count: int
        :param extract_orgs: bool
        :return: str, str, str, Counter (first name, middle name, last name, positions Counter)
        """
        name_raw = Person.remove_privlog_info(name_raw)
        # remove JR, SR, or III if it follows this pattern: 'Chumney-RD-Jr'
        name_raw = Person.remove_jr_sr_iii(name_raw)

        # position is often attached with a dash,
        # e.g. 'BAKER, T E - NATIONAL ASSOCIATION OF ATTORNEYS'
        if name_raw.find(" - ") > -1 and len(name_raw.split(' - ')) == 2:
            name_raw, extracted_position = name_raw.split(" - ")
            extracted_positions = [extracted_position.strip()]
        else:
            extracted_positions = []

        # extract positions in parens e.g. Henson, A (Chadbourne & Park)
        paren_positions = re.findall(r'\([^(]+\)', name_raw)
        for position in paren_positions:
            extracted_positions.append(position.strip(',#() '))
            name_raw = name_raw.replace(position, '')

        # Search for known raw_org strings in name_raw, extract them as positions if necessary
        if extract_orgs:
            name_raw, new_positions = Person.extract_raw_org_names_from_name(name_raw)
            extracted_positions += new_positions

        # delete any leftover hashtags
        name_raw = name_raw.strip(' #')

        # Delete dashes between last name and initials
        # DUNN-W -> Dunn W
        if name_raw[-2] == '-':
            name_raw = name_raw[:-2] + " " + name_raw[-1:]
        # DUNN-WL -> DUNN WL
        if len(name_raw) > 2 and name_raw[-3] == '-':
            name_raw = name_raw[:-3] + " " + name_raw[-2:]

        # Parse current string using HumanName
        name = HumanName(name_raw)

        # e.g. Dunn W -> parsed as last name W. -> switch first/last
        if len(name.last) <= 2 < len(name.first):
            name.first, name.last = name.last, name.first

        # remove periods from initials
        if len(name.first) == 2 and name.first[1] == '.':
            name.first = name.first[0]
        if len(name.middle) == 2 and name.middle[1] == '.':
            name.middle = name.middle[0]

        # If first name is length 2 (Teague, CE), the two letters are most likely initials.
        if len(name.middle) == 0 and len(name.first) == 2:
            name.middle = name.first[1].upper()
            name.first = name.first[0].upper()

        # If first and middle initials have periods but not spaces -> separate, e.g. "R.K. Teague"
        if re.match(r'[a-zA-Z]\.[a-zA-Z]\.', name.first):
            name.middle = name.first[2]
            name.first = name.first[0]

        name.last = name.last.capitalize()
        name.first = name.first.capitalize()
        name.middle = name.middle.capitalize()

        # if multiple names are passed, they often end up in the middle name
        # e.g. 'Holtzman, A.,  Murray, J. ,  Henson, A.  -> only allow one comma or set to empty
        if name.middle.count(',') > 1:
            name.middle = ''

        if len(name.suffix) > 20 and name.suffix.count('.') > 2:
            name.suffix = ''

        if name.suffix:
            extracted_positions.append(name.suffix)

        # map organization names to clean official names (if they are in the dict) using
        # RAW_ORG_TO_CLEAN_ORG_DICT
        clean_orgs = []
        for raw_org in extracted_positions:
            if raw_org in RAW_ORG_TO_CLEAN_ORG_DICT:
                clean_org = RAW_ORG_TO_CLEAN_ORG_DICT[raw_org]
                if clean_org != '@skip@':
                    clean_orgs.append(clean_org)
            else:
                clean_orgs.append(raw_org)
        extracted_positions = clean_orgs

        # convert mapped positions into a counter
        result_positions = Counter()
        for position in extracted_positions:
            cleaned = re.sub(r'\.', '', position)
            result_positions[cleaned.upper()] += count

        # print(name.first, name.middle, name.last, result_positions)
        return name.first, name.middle, name.last, result_positions
Example #22
0
def human_to_csl(name):
    """Convert HumanName to CSL-formatted JSON.

    Args:
        name : HumanName or str / unicode
    Returns:
        CSL-formatted JSON

    Examples:
    >>> csl = human_to_csl('Rafael Nadal')
    >>> csl == {'given' : 'Rafael', 'family' : 'Nadal'}
    True
    >>> csl = human_to_csl(HumanName('Rafael Nadal'))
    >>> csl == {'given' : 'Rafael', 'family' : 'Nadal'}
    True
    >>> csl = human_to_csl(HumanName('George HW de Bush'))
    >>> csl == {'given' : 'George H. W.', 'family' : 'de Bush'}
    True
    >>> csl = human_to_csl('Eisenhower, I')
    >>> csl == {'given' : 'I.', 'family' : 'Eisenhower'}
    True
    >>> csl = human_to_csl('Eisenhower, V')
    >>> csl == {'given' : 'V.', 'family' : 'Eisenhower'}
    True
    """
    # Optionally convert to nameparser.HumanName
    if not isinstance(name, HumanName):
        name = HumanName(name)
    
    # Fix: nameparser treats HumanName('Eisenhower, I') as 
    # {first : 'Eisenhower', suffix : 'I'}
    if re.search('^[IV]\.*$', name.suffix):
        name.last = name.first
        name.first = name.suffix
        name.suffix = ''

    # Initialize CSL data
    csl_data = {}
    
    # Append middle name to first
    if name.middle:
        name.first += ' ' + name.middle

    # Iterate over lookup fields
    for lookup in human_to_csl_map:
        
        # Get field and function
        field = human_to_csl_map[lookup]['field']
        fun = human_to_csl_map[lookup].get('fun', I)
        
        # Get field from name
        value = getattr(name, field)

        # Skip if empty
        if not value:
            continue

        # Apply function
        value = fun(value)
        
        # Save to CSL data
        csl_data[lookup] = value

    # Return CSL data
    return csl_data
Example #23
0
	def namer(field):
		#pre
		if type(field) == tuple:
			w_name = re.sub('[\t\r\n]', '', ", ".join([x.encode('ascii', 'ignore') for x in field])).upper()
		else:
			w_name = re.sub('[\t\r\n]', '', field.encode('ascii', 'ignore')).upper()
		if 'ANONYMOUS' not in w_name:
			if ' FORMER ' not in w_name:
				w_name = re.split(";", w_name)[0]
			else:
				w_name = re.split(";", w_name)[1]

			w_name = re.sub("(?<=[`'/+]) | (?=['`/+])", '', w_name) #6A, 4A-C
			
			out = HumanName(w_name)
			out.middle = re.sub("^[A-Z] |^[A-Z]\. ", '', out.middle)
			if " " in out.last:
				out.last = re.sub("^[A-Z] |^[A-Z]\. ", '', out.last)
			if re.sub("^[A-Z]\.|^[A-Z]", '', out.first) == '' and len(out.middle) != 0:
				out.first, out.middle = out.middle, ""
			else:
				out.first = re.sub("^[A-Z] |^[A-Z]\. ", '', out.first)
			
			#post
			
			if out.middle.startswith("FOR ") or out.middle.startswith("- "): #7A, 1B, 3E
				out.middle = "" 

			if " FOR " in out.last:
				out.last = re.sub(" FOR .*", '', out.last)

			if len(out.last) == 0 and len(out.title) != 0: #9A
				if " " in out.first:
					out = HumanName(out.first)
				else:
					out.first, out.last = "", out.first

			if " AND " in out.middle or " & " in out.middle:
				out.last = re.split("( AND )|( & )", out.middle)[0]
				out.middle = ""
 			if "AND" in out.last or "&" in out.last:

				if out.last.startswith("AND ") or out.last.startswith("& "): #3F
					out.last = HumanName(out.last).last
				elif " AND " in out.last or " & " in out.last:
					out.last = re.sub("( AND ).*|( & ).*", '', out.last)
			out.first = re.split("( AND )|&|/|\+", out.first)[0]
			out.last = re.split("/", out.last)[0].strip()
			if len(re.sub("[^A-Z]", '', out.first)) == 1 and " " in out.last:
				out.first = out.last.split(" ")[0]
				out.last = out.last.split(" ")[1]
			out.capitalize()
			first, last = out.first, out.last
			if len(out.middle) > 0:
				if re.sub("^[A-Z]\.|^[A-Z]", '', out.middle) == '':
					out.middle = ""
				elif first.endswith("-") or out.middle.startswith("-"):
					first += out.middle
				else:
					first += " %s" % out.middle #8A-B
			if len(out.suffix) > 0:
				last += " %s" % out.suffix #2A
			return (first, last)
		else:
			name = HumanName(w_name)
			return (name.first, name.last)
Example #24
0

def match(name1, name2):
    n1, n2 = HumanName(name1), HumanName(name2)
    return (any(
        u(x) == u(y) for x in get_potential_names(n1)
        for y in get_potential_names(n2)))


with open('ap_candidates.csv') as f:
    reader = csv.DictReader(f)
    ap_candidates = [row for row in reader]
    for row in ap_candidates:
        n = HumanName()
        n.first = row['first_name']
        n.middle = row['middle_name']
        n.last = row['last_name']
        n.suffix = row['suffix']
        row['name'] = str(n)

with open('ap_historical_ids.csv') as f:
    reader = csv.DictReader(f)
    ap_candidates2 = [row for row in reader]


def find(name):
    for row in ap_candidates:
        if match(name, row['name']):
            # print(f'found match for {name} with', row['name'])
            return int(row['pol_id'])