def possible_matching_speakers(self, update_aliases=False): """ Return array of person objects that might be the speaker. If 'update_aliases' is True (False by default) and the name cannot be ignored then an entry will be made in the alias table that so that the alias is inspected by an admin. """ name = self.speaker_name name = Alias.clean_up_name( name ) # First check for a matching alias that is not ignored try: alias = Alias.objects.get( alias=name ) if alias.ignored: # if the alias is ignored we should not match anything return [] elif alias.person: return [ alias.person ] elif alias.is_unassigned: # Pretend that this alias does not exist so that it is checked # in case new people have been added to the database since the # last run. pass else: return [] except Alias.DoesNotExist: alias = None # drop the prefix stripped_name = re.sub( r'^\w+\.\s', '', name ) person_search = ( Person .objects .all() .is_politician( when=self.sitting.start_date ) .filter(legal_name__icontains=stripped_name) ) results = person_search.all()[0:] found_one_result = len(results) == 1 # If there is a single matching speaker and an unassigned alias delete it if found_one_result and alias and alias.is_unassigned: alias.delete() # create an entry in the aliases table if one is needed if not alias and update_aliases and not found_one_result and not Alias.can_ignore_name(name): Alias.objects.create( alias = name, ignored = False, person = None, ) return results
def possible_matching_speakers(self, update_aliases=False): """ Return array of person objects that might be the speaker. If 'update_aliases' is True (False by default) and the name cannot be ignored then an entry will be made in the alias table that so that the alias is inspected by an admin. """ name = self.speaker_name name = Alias.clean_up_name(name) # First check for a matching alias that is not ignored try: alias = Alias.objects.get(alias=name) if alias.ignored: # if the alias is ignored we should not match anything return [] elif alias.person: return [alias.person] elif alias.is_unassigned: # Pretend that this alias does not exist so that it is checked # in case new people have been added to the database since the # last run. pass else: return [] except Alias.DoesNotExist: alias = None # drop the prefix stripped_name = re.sub(r'^\w+\.\s', '', name) person_search = (Person.objects.all().is_politician( when=self.sitting.start_date).filter( legal_name__icontains=stripped_name)) results = person_search.all()[0:] found_one_result = len(results) == 1 # If there is a single matching speaker and an unassigned alias delete it if found_one_result and alias and alias.is_unassigned: alias.delete() # create an entry in the aliases table if one is needed if not alias and update_aliases and not found_one_result and not Alias.can_ignore_name( name): Alias.objects.create( alias=name, ignored=False, person=None, ) return results
def test_alias_cleanup(self): """Check that the name is cleaned up as we'd expect""" tests = [ # ('from', 'to'), (' Mr. Foo ', 'Mr. Foo'), ('Mr. Foo,', 'Mr. Foo'), ('Mr.Foo,', 'Mr. Foo'), ('Mr. Foo,', 'Mr. Foo'), ('(Mr. Foo)', 'Mr. Foo'), ('[Mr. Foo]', 'Mr. Foo'), ('Mr A.N. Other', 'Mr. A. N. Other'), # Senators ('Hon. Ethuro', 'Hon. Ethuro'), ('Sen. (Prof.) Lonyagapuo', 'Prof. Lonyagapuo'), ] for dirty, clean in tests: self.assertEqual(Alias.clean_up_name(dirty), clean)
def test_alias_cleanup(self): """Check that the name is cleaned up as we'd expect""" tests = [ # ('from', 'to'), (' Mr. Foo ', 'Mr. Foo' ), ('Mr. Foo,', 'Mr. Foo' ), ('Mr.Foo,', 'Mr. Foo' ), ('Mr. Foo,', 'Mr. Foo' ), ('(Mr. Foo)', 'Mr. Foo' ), ('[Mr. Foo]', 'Mr. Foo' ), ( 'Mr A.N. Other', 'Mr. A. N. Other' ), # Senators ('Hon. Ethuro', 'Hon. Ethuro' ), ('Sen. (Prof.) Lonyagapuo', 'Prof. Lonyagapuo' ), ] for dirty, clean in tests: self.assertEqual( Alias.clean_up_name(dirty), clean )
def test_can_ignore_some_speakers(self): # These are all names that appear because the parser sometimes gets confused. # Rather than fix the parser (very hard) make sure that we ignore these names so # that missing name report is not so long. speaker_names = [ "10 Thursday 10th February, 2011(P) Mr. Kombo", "(a)", "Act to 58A.", "ADJOURNMENT 29 Wednesday, 1st December, 2010 (A) Mr. Deputy Speaker", "April 21, 2009 PARLIAMENTARY DEBATES 2 Mr. Speaker", "(b)", "Cap.114 26.", "COMMUNICATION FROM THE CHAIR Mr. Speaker", "Deputy Speaker", "(i) Energy, Communications and Information Committee", "(ii) Local Authorities Committee", "(iii) Transport, Public Works and Housing Committee", "(iv) Committee on Implementation", "NOTICES OF MOTIONS Mr. Affey", "QUORUM Mr. Ahenda", "Tellers of Ayes", "The Assistant for Lands", "The Assistant Minister for Agriculture", "The Attorney-General", "The Member for Fafi", "The Minister for Roads", ] false_count = 0 for name in speaker_names: result = Alias.can_ignore_name(name) if not result: print "Got True for Alias.can_ignore_name( '%s' ), expecting False" % name false_count += 1 self.assertEqual(false_count, 0)
def test_can_ignore_some_speakers(self): # These are all names that appear because the parser sometimes gets confused. # Rather than fix the parser (very hard) make sure that we ignore these names so # that missing name report is not so long. speaker_names = [ "10 Thursday 10th February, 2011(P) Mr. Kombo", "(a)", "Act to 58A.", "ADJOURNMENT 29 Wednesday, 1st December, 2010 (A) Mr. Deputy Speaker", "April 21, 2009 PARLIAMENTARY DEBATES 2 Mr. Speaker", "(b)", "Cap.114 26.", "COMMUNICATION FROM THE CHAIR Mr. Speaker", "Deputy Speaker", "(i) Energy, Communications and Information Committee", "(ii) Local Authorities Committee", "(iii) Transport, Public Works and Housing Committee", "(iv) Committee on Implementation", "NOTICES OF MOTIONS Mr. Affey", "QUORUM Mr. Ahenda", "Tellers of Ayes", "The Assistant for Lands", "The Assistant Minister for Agriculture", "The Attorney-General", "The Member for Fafi", "The Minister for Roads", ] false_count = 0 for name in speaker_names: result = Alias.can_ignore_name( name ) if not result: print "Got True for Alias.can_ignore_name( '%s' ), expecting False" % name false_count += 1 self.assertEqual( false_count, 0 )
def possible_matching_speakers(self, update_aliases=False, name_matching_algorithm=NAME_SET_INTERSECTION_MATCH): """ Return array of person objects that might be the speaker. If 'update_aliases' is True (False by default) and the name cannot be ignored then an entry will be made in the alias table that so that the alias is inspected by an admin. """ name = self.speaker_name name = Alias.clean_up_name( name ) # First check for a matching alias that is not ignored try: alias = Alias.objects.get( alias=name ) if alias.ignored: # if the alias is ignored we should not match anything return [] elif alias.person: return [ alias.person ] elif alias.is_unassigned: # Pretend that this alias does not exist so that it is checked # in case new people have been added to the database since the # last run. pass else: return [] except Alias.DoesNotExist: alias = None person_search = ( Person .objects .all() .is_politician( when=self.sitting.start_date ) .exclude(hidden=True) .distinct() ) if name_matching_algorithm == NAME_SUBSTRING_MATCH: # drop the prefix stripped_name = re.sub(r'^\w+\.\s', '', name) person_search = person_search.filter(legal_name__icontains=stripped_name) # if the results are ambiguous, try restricting to members of the current house # unless it's a joint sitting, in which case this is dangerous # # FIXME: (1) the position filter currently checks whether a person has *ever* held # a qualifying position, would be better if this were a check against # whether the position was held at date of the sitting. # # (2) it might also be interesting to have an optional Pombola Organisation # associated with a Sitting so that it would be easier to check whether the # Person has a matching association with an Organisation rather than checking # PositionTitle names (not sure what would happen with Joint Sittings - dual association?) if len(person_search) > 1 and 'Joint Sitting' not in self.sitting.source.name: if self.sitting.venue.name == 'Senate': current_house = person_search.filter(position__title__name__contains='Senator') else: current_house = person_search.filter(position__title__name__contains=self.sitting.venue.name) if current_house: person_search = current_house results = person_search.all()[0:] if name_matching_algorithm == NAME_SET_INTERSECTION_MATCH: results = sorted( [i for i in results if self.alias_match_score('%s %s'%(i.title, i.legal_name), name) > 1], key=lambda x: self.alias_match_score('%s %s'%(x.title, x.legal_name), name), reverse=True, ) found_one_result = len(results) == 1 # If there is a single matching speaker and an unassigned alias delete it if found_one_result and alias and alias.is_unassigned: alias.delete() # create an entry in the aliases table if one is needed if not alias and update_aliases and not found_one_result and not Alias.can_ignore_name(name): Alias.objects.create( alias = name, ignored = False, person = None, ) return results
def possible_matching_speakers( self, update_aliases=False, name_matching_algorithm=NAME_SET_INTERSECTION_MATCH): """ Return array of person objects that might be the speaker. If 'update_aliases' is True (False by default) and the name cannot be ignored then an entry will be made in the alias table that so that the alias is inspected by an admin. """ name = self.speaker_name # Nominated reps don't have a unique speaker name, so fall back to the speaker title if re.split(r'[,\s]+', self.speaker_name)[0] == 'Nominated': name = self.speaker_title name = Alias.clean_up_name(name) # First check for a matching alias that is not ignored try: alias = Alias.objects.get(alias=name) if alias.ignored: # if the alias is ignored we should not match anything return [] elif alias.person: return [alias.person] elif alias.is_unassigned: # Pretend that this alias does not exist so that it is checked # in case new people have been added to the database since the # last run. pass else: return [] except Alias.DoesNotExist: alias = None person_search = (Person.objects.all().is_politician( when=self.sitting.start_date).exclude(hidden=True).distinct()) if name_matching_algorithm == NAME_SUBSTRING_MATCH: # drop the prefix stripped_name = re.sub(r'^\w+\.\s', '', name) person_search = person_search.filter( legal_name__icontains=stripped_name) # if the results are ambiguous, try restricting to members of the current house # unless it's a joint sitting, in which case this is dangerous # # FIXME: (1) the position filter currently checks whether a person has *ever* held # a qualifying position, would be better if this were a check against # whether the position was held at date of the sitting. # # (2) it might also be interesting to have an optional Pombola Organisation # associated with a Sitting so that it would be easier to check whether the # Person has a matching association with an Organisation rather than checking # PositionTitle names (not sure what would happen with Joint Sittings - dual association?) if len(person_search ) > 1 and 'Joint Sitting' not in self.sitting.source.name: if self.sitting.venue.name == 'Senate': current_house = person_search.filter( position__title__name__contains='Senator') else: current_house = person_search.filter( position__title__name__contains=self.sitting.venue.name ) if current_house: person_search = current_house results = person_search.all()[0:] if name_matching_algorithm == NAME_SET_INTERSECTION_MATCH: results = sorted( [ i for i in results if self.alias_match_score('%s %s' % (i.title, i.legal_name), name) > 1 ], key=lambda x: self.alias_match_score( '%s %s' % (x.title, x.legal_name), name), reverse=True, ) if len(results) == 0: place_name, party_initials = self.place_name_and_party_initials_from_hansard_name( name) if place_name and party_initials: matches = self.find_person_from_constituency_and_party_reference( place_name, party_initials) if matches: results = matches else: # Create alias so admins can manually match Alias.objects.get_or_create(alias=name) return [] found_one_result = len(results) == 1 # If there is a single matching speaker and an unassigned alias delete it if found_one_result and alias and alias.is_unassigned: alias.delete() # create an entry in the aliases table if one is needed if not alias and update_aliases and not found_one_result and not Alias.can_ignore_name( name): Alias.objects.create( alias=name, ignored=False, person=None, ) return results