Beispiel #1
0
    def possible_matching_speakers(self, update_aliases=False):
        """
        Return array of person objects that might be the speaker.

        If 'update_aliases' is True (False by default) and the name cannot be
        ignored then an entry will be made in the alias table that so that the
        alias is inspected by an admin.
        """

        name = self.speaker_name
        name = Alias.clean_up_name( name )
        
        # First check for a matching alias that is not ignored
        try:
            alias = Alias.objects.get( alias=name )
            
            if alias.ignored:
                # if the alias is ignored we should not match anything
                return []
            elif alias.person:
                return [ alias.person ]
            elif alias.is_unassigned:
                # Pretend that this alias does not exist so that it is checked
                # in case new people have been added to the database since the
                # last run.
                pass
            else:
                return []

        except Alias.DoesNotExist:
            alias = None
        
        # drop the prefix
        stripped_name = re.sub( r'^\w+\.\s', '', name )
        
        person_search = (
            Person
            .objects
            .all()
            .is_politician( when=self.sitting.start_date )
            .filter(legal_name__icontains=stripped_name)
        )
        
        results = person_search.all()[0:]
        
        found_one_result = len(results) == 1

        # If there is a single matching speaker and an unassigned alias delete it
        if found_one_result and alias and alias.is_unassigned:
            alias.delete()
            
        # create an entry in the aliases table if one is needed
        if not alias and update_aliases and not found_one_result and not Alias.can_ignore_name(name):
            Alias.objects.create(
                alias   = name,
                ignored = False,
                person  = None,
            )
        
        return results
Beispiel #2
0
    def possible_matching_speakers(self, update_aliases=False):
        """
        Return array of person objects that might be the speaker.

        If 'update_aliases' is True (False by default) and the name cannot be
        ignored then an entry will be made in the alias table that so that the
        alias is inspected by an admin.
        """

        name = self.speaker_name
        name = Alias.clean_up_name(name)

        # First check for a matching alias that is not ignored
        try:
            alias = Alias.objects.get(alias=name)

            if alias.ignored:
                # if the alias is ignored we should not match anything
                return []
            elif alias.person:
                return [alias.person]
            elif alias.is_unassigned:
                # Pretend that this alias does not exist so that it is checked
                # in case new people have been added to the database since the
                # last run.
                pass
            else:
                return []

        except Alias.DoesNotExist:
            alias = None

        # drop the prefix
        stripped_name = re.sub(r'^\w+\.\s', '', name)

        person_search = (Person.objects.all().is_politician(
            when=self.sitting.start_date).filter(
                legal_name__icontains=stripped_name))

        results = person_search.all()[0:]

        found_one_result = len(results) == 1

        # If there is a single matching speaker and an unassigned alias delete it
        if found_one_result and alias and alias.is_unassigned:
            alias.delete()

        # create an entry in the aliases table if one is needed
        if not alias and update_aliases and not found_one_result and not Alias.can_ignore_name(
                name):
            Alias.objects.create(
                alias=name,
                ignored=False,
                person=None,
            )

        return results
Beispiel #3
0
    def test_alias_cleanup(self):
        """Check that the name is cleaned up as we'd expect"""

        tests = [
            # ('from', 'to'),
            ('   Mr. Foo  ', 'Mr. Foo'),
            ('Mr. Foo,', 'Mr. Foo'),
            ('Mr.Foo,', 'Mr. Foo'),
            ('Mr.   Foo,', 'Mr. Foo'),
            ('(Mr. Foo)', 'Mr. Foo'),
            ('[Mr. Foo]', 'Mr. Foo'),
            ('Mr A.N. Other', 'Mr. A. N. Other'),

            # Senators
            ('Hon. Ethuro', 'Hon. Ethuro'),
            ('Sen. (Prof.) Lonyagapuo', 'Prof. Lonyagapuo'),
        ]

        for dirty, clean in tests:
            self.assertEqual(Alias.clean_up_name(dirty), clean)
Beispiel #4
0
    def test_alias_cleanup(self):
        """Check that the name is cleaned up as we'd expect"""

        tests = [
            # ('from', 'to'),
            ('   Mr. Foo  ', 'Mr. Foo' ),
            ('Mr. Foo,',     'Mr. Foo' ),
            ('Mr.Foo,',      'Mr. Foo' ),
            ('Mr.   Foo,',   'Mr. Foo' ),
            ('(Mr. Foo)',    'Mr. Foo' ),
            ('[Mr. Foo]',    'Mr. Foo' ),

            ( 'Mr A.N. Other', 'Mr. A. N. Other' ),

            # Senators
            ('Hon. Ethuro',             'Hon. Ethuro'      ),
            ('Sen. (Prof.) Lonyagapuo', 'Prof. Lonyagapuo' ),

        ]

        for dirty, clean in tests:
            self.assertEqual( Alias.clean_up_name(dirty), clean )
Beispiel #5
0
    def test_can_ignore_some_speakers(self):

        # These are all names that appear because the parser sometimes gets confused.
        # Rather than fix the parser (very hard) make sure that we ignore these names so
        # that missing name report is not so long.
        speaker_names = [
            "10 Thursday 10th February, 2011(P) Mr. Kombo",
            "(a)",
            "Act to 58A.",
            "ADJOURNMENT 29 Wednesday, 1st December, 2010 (A) Mr. Deputy Speaker",
            "April 21, 2009 PARLIAMENTARY DEBATES 2 Mr. Speaker",
            "(b)",
            "Cap.114 26.",
            "COMMUNICATION FROM THE CHAIR Mr. Speaker",
            "Deputy Speaker",
            "(i) Energy, Communications and Information Committee",
            "(ii) Local Authorities Committee",
            "(iii) Transport, Public Works and Housing Committee",
            "(iv) Committee on Implementation",
            "NOTICES OF MOTIONS Mr. Affey",
            "QUORUM Mr. Ahenda",
            "Tellers of Ayes",
            "The Assistant for Lands",
            "The Assistant Minister for Agriculture",
            "The Attorney-General",
            "The Member for Fafi",
            "The Minister for Roads",
        ]

        false_count = 0

        for name in speaker_names:
            result = Alias.can_ignore_name(name)
            if not result:
                print "Got True for Alias.can_ignore_name( '%s' ), expecting False" % name
                false_count += 1

        self.assertEqual(false_count, 0)
Beispiel #6
0
    def test_can_ignore_some_speakers(self):

        # These are all names that appear because the parser sometimes gets confused.
        # Rather than fix the parser (very hard) make sure that we ignore these names so
        # that missing name report is not so long.
        speaker_names = [
            "10 Thursday 10th February, 2011(P) Mr. Kombo",
            "(a)",
            "Act to 58A.",
            "ADJOURNMENT 29 Wednesday, 1st December, 2010 (A) Mr. Deputy Speaker",
            "April 21, 2009 PARLIAMENTARY DEBATES 2 Mr. Speaker",
            "(b)",
            "Cap.114 26.",
            "COMMUNICATION FROM THE CHAIR Mr. Speaker",
            "Deputy Speaker",
            "(i) Energy, Communications and Information Committee",
            "(ii) Local Authorities Committee",
            "(iii) Transport, Public Works and Housing Committee",
            "(iv) Committee on Implementation",
            "NOTICES OF MOTIONS Mr. Affey",
            "QUORUM Mr. Ahenda",
            "Tellers of Ayes",
            "The Assistant for Lands",
            "The Assistant Minister for Agriculture",
            "The Attorney-General",
            "The Member for Fafi",
            "The Minister for Roads",
        ]

        false_count = 0

        for name in speaker_names:
            result = Alias.can_ignore_name( name ) 
            if not result:
                print "Got True for Alias.can_ignore_name( '%s' ), expecting False" % name
                false_count += 1

        self.assertEqual( false_count, 0 )
Beispiel #7
0
    def possible_matching_speakers(self, update_aliases=False, name_matching_algorithm=NAME_SET_INTERSECTION_MATCH):
        """
        Return array of person objects that might be the speaker.

        If 'update_aliases' is True (False by default) and the name cannot be
        ignored then an entry will be made in the alias table that so that the
        alias is inspected by an admin.
        """

        name = self.speaker_name
        name = Alias.clean_up_name( name )

        # First check for a matching alias that is not ignored
        try:
            alias = Alias.objects.get( alias=name )

            if alias.ignored:
                # if the alias is ignored we should not match anything
                return []
            elif alias.person:
                return [ alias.person ]
            elif alias.is_unassigned:
                # Pretend that this alias does not exist so that it is checked
                # in case new people have been added to the database since the
                # last run.
                pass
            else:
                return []

        except Alias.DoesNotExist:
            alias = None

        person_search = (
            Person
            .objects
            .all()
            .is_politician( when=self.sitting.start_date )
            .exclude(hidden=True)
            .distinct()
        )

        if name_matching_algorithm == NAME_SUBSTRING_MATCH:
            # drop the prefix
            stripped_name = re.sub(r'^\w+\.\s', '', name)
            person_search = person_search.filter(legal_name__icontains=stripped_name)

            # if the results are ambiguous, try restricting to members of the current house
            # unless it's a joint sitting, in which case this is dangerous
            #
            # FIXME: (1) the position filter currently checks whether a person has *ever* held
            #        a qualifying position, would be better if this were a check against
            #        whether the position was held at date of the sitting.
            #
            #        (2) it might also be interesting to have an optional Pombola Organisation
            #        associated with a Sitting so that it would be easier to check whether the
            #        Person has a matching association with an Organisation rather than checking
            #        PositionTitle names (not sure what would happen with Joint Sittings - dual association?)

            if len(person_search) > 1 and 'Joint Sitting' not in self.sitting.source.name:
                if self.sitting.venue.name == 'Senate':
                    current_house = person_search.filter(position__title__name__contains='Senator')
                else:
                    current_house = person_search.filter(position__title__name__contains=self.sitting.venue.name)
                if current_house:
                    person_search = current_house

        results = person_search.all()[0:]

        if name_matching_algorithm == NAME_SET_INTERSECTION_MATCH:
            results = sorted(
                [i for i in results if self.alias_match_score('%s %s'%(i.title, i.legal_name), name) > 1],
                key=lambda x: self.alias_match_score('%s %s'%(x.title, x.legal_name), name),
                reverse=True,
                )

        found_one_result = len(results) == 1

        # If there is a single matching speaker and an unassigned alias delete it
        if found_one_result and alias and alias.is_unassigned:
            alias.delete()

        # create an entry in the aliases table if one is needed
        if not alias and update_aliases and not found_one_result and not Alias.can_ignore_name(name):
            Alias.objects.create(
                alias   = name,
                ignored = False,
                person  = None,
            )

        return results
Beispiel #8
0
    def possible_matching_speakers(
            self,
            update_aliases=False,
            name_matching_algorithm=NAME_SET_INTERSECTION_MATCH):
        """
        Return array of person objects that might be the speaker.

        If 'update_aliases' is True (False by default) and the name cannot be
        ignored then an entry will be made in the alias table that so that the
        alias is inspected by an admin.
        """

        name = self.speaker_name

        # Nominated reps don't have a unique speaker name, so fall back to the speaker title
        if re.split(r'[,\s]+', self.speaker_name)[0] == 'Nominated':
            name = self.speaker_title

        name = Alias.clean_up_name(name)

        # First check for a matching alias that is not ignored
        try:
            alias = Alias.objects.get(alias=name)

            if alias.ignored:
                # if the alias is ignored we should not match anything
                return []
            elif alias.person:
                return [alias.person]
            elif alias.is_unassigned:
                # Pretend that this alias does not exist so that it is checked
                # in case new people have been added to the database since the
                # last run.
                pass
            else:
                return []

        except Alias.DoesNotExist:
            alias = None

        person_search = (Person.objects.all().is_politician(
            when=self.sitting.start_date).exclude(hidden=True).distinct())

        if name_matching_algorithm == NAME_SUBSTRING_MATCH:
            # drop the prefix
            stripped_name = re.sub(r'^\w+\.\s', '', name)
            person_search = person_search.filter(
                legal_name__icontains=stripped_name)

            # if the results are ambiguous, try restricting to members of the current house
            # unless it's a joint sitting, in which case this is dangerous
            #
            # FIXME: (1) the position filter currently checks whether a person has *ever* held
            #        a qualifying position, would be better if this were a check against
            #        whether the position was held at date of the sitting.
            #
            #        (2) it might also be interesting to have an optional Pombola Organisation
            #        associated with a Sitting so that it would be easier to check whether the
            #        Person has a matching association with an Organisation rather than checking
            #        PositionTitle names (not sure what would happen with Joint Sittings - dual association?)

            if len(person_search
                   ) > 1 and 'Joint Sitting' not in self.sitting.source.name:
                if self.sitting.venue.name == 'Senate':
                    current_house = person_search.filter(
                        position__title__name__contains='Senator')
                else:
                    current_house = person_search.filter(
                        position__title__name__contains=self.sitting.venue.name
                    )
                if current_house:
                    person_search = current_house

        results = person_search.all()[0:]

        if name_matching_algorithm == NAME_SET_INTERSECTION_MATCH:
            results = sorted(
                [
                    i for i in results if
                    self.alias_match_score('%s %s' %
                                           (i.title, i.legal_name), name) > 1
                ],
                key=lambda x: self.alias_match_score(
                    '%s %s' % (x.title, x.legal_name), name),
                reverse=True,
            )

        if len(results) == 0:
            place_name, party_initials = self.place_name_and_party_initials_from_hansard_name(
                name)
            if place_name and party_initials:
                matches = self.find_person_from_constituency_and_party_reference(
                    place_name, party_initials)
                if matches:
                    results = matches
                else:
                    # Create alias so admins can manually match
                    Alias.objects.get_or_create(alias=name)
                    return []

        found_one_result = len(results) == 1

        # If there is a single matching speaker and an unassigned alias delete it
        if found_one_result and alias and alias.is_unassigned:
            alias.delete()

        # create an entry in the aliases table if one is needed
        if not alias and update_aliases and not found_one_result and not Alias.can_ignore_name(
                name):
            Alias.objects.create(
                alias=name,
                ignored=False,
                person=None,
            )

        return results