Esempio n. 1
0
    def possible_matching_speakers(self, update_aliases=False):
        """
        Return array of person objects that might be the speaker.

        If 'update_aliases' is True (False by default) and the name cannot be
        ignored then an entry will be made in the alias table that so that the
        alias is inspected by an admin.
        """

        name = self.speaker_name
        name = Alias.clean_up_name( name )
        
        # First check for a matching alias that is not ignored
        try:
            alias = Alias.objects.get( alias=name )
            
            if alias.ignored:
                # if the alias is ignored we should not match anything
                return []
            elif alias.person:
                return [ alias.person ]
            elif alias.is_unassigned:
                # Pretend that this alias does not exist so that it is checked
                # in case new people have been added to the database since the
                # last run.
                pass
            else:
                return []

        except Alias.DoesNotExist:
            alias = None
        
        # drop the prefix
        stripped_name = re.sub( r'^\w+\.\s', '', name )
        
        person_search = (
            Person
            .objects
            .all()
            .is_politician( when=self.sitting.start_date )
            .filter(legal_name__icontains=stripped_name)
        )
        
        results = person_search.all()[0:]
        
        found_one_result = len(results) == 1

        # If there is a single matching speaker and an unassigned alias delete it
        if found_one_result and alias and alias.is_unassigned:
            alias.delete()
            
        # create an entry in the aliases table if one is needed
        if not alias and update_aliases and not found_one_result and not Alias.can_ignore_name(name):
            Alias.objects.create(
                alias   = name,
                ignored = False,
                person  = None,
            )
        
        return results
Esempio n. 2
0
    def possible_matching_speakers(self, update_aliases=False):
        """
        Return array of person objects that might be the speaker.

        If 'update_aliases' is True (False by default) and the name cannot be
        ignored then an entry will be made in the alias table that so that the
        alias is inspected by an admin.
        """

        name = self.speaker_name
        name = Alias.clean_up_name(name)

        # First check for a matching alias that is not ignored
        try:
            alias = Alias.objects.get(alias=name)

            if alias.ignored:
                # if the alias is ignored we should not match anything
                return []
            elif alias.person:
                return [alias.person]
            elif alias.is_unassigned:
                # Pretend that this alias does not exist so that it is checked
                # in case new people have been added to the database since the
                # last run.
                pass
            else:
                return []

        except Alias.DoesNotExist:
            alias = None

        # drop the prefix
        stripped_name = re.sub(r'^\w+\.\s', '', name)

        person_search = (Person.objects.all().is_politician(
            when=self.sitting.start_date).filter(
                legal_name__icontains=stripped_name))

        results = person_search.all()[0:]

        found_one_result = len(results) == 1

        # If there is a single matching speaker and an unassigned alias delete it
        if found_one_result and alias and alias.is_unassigned:
            alias.delete()

        # create an entry in the aliases table if one is needed
        if not alias and update_aliases and not found_one_result and not Alias.can_ignore_name(
                name):
            Alias.objects.create(
                alias=name,
                ignored=False,
                person=None,
            )

        return results
Esempio n. 3
0
    def test_alias_cleanup(self):
        """Check that the name is cleaned up as we'd expect"""

        tests = [
            # ('from', 'to'),
            ('   Mr. Foo  ', 'Mr. Foo'),
            ('Mr. Foo,', 'Mr. Foo'),
            ('Mr.Foo,', 'Mr. Foo'),
            ('Mr.   Foo,', 'Mr. Foo'),
            ('(Mr. Foo)', 'Mr. Foo'),
            ('[Mr. Foo]', 'Mr. Foo'),
            ('Mr A.N. Other', 'Mr. A. N. Other'),

            # Senators
            ('Hon. Ethuro', 'Hon. Ethuro'),
            ('Sen. (Prof.) Lonyagapuo', 'Prof. Lonyagapuo'),
        ]

        for dirty, clean in tests:
            self.assertEqual(Alias.clean_up_name(dirty), clean)
Esempio n. 4
0
    def test_alias_cleanup(self):
        """Check that the name is cleaned up as we'd expect"""

        tests = [
            # ('from', 'to'),
            ('   Mr. Foo  ', 'Mr. Foo' ),
            ('Mr. Foo,',     'Mr. Foo' ),
            ('Mr.Foo,',      'Mr. Foo' ),
            ('Mr.   Foo,',   'Mr. Foo' ),
            ('(Mr. Foo)',    'Mr. Foo' ),
            ('[Mr. Foo]',    'Mr. Foo' ),

            ( 'Mr A.N. Other', 'Mr. A. N. Other' ),

            # Senators
            ('Hon. Ethuro',             'Hon. Ethuro'      ),
            ('Sen. (Prof.) Lonyagapuo', 'Prof. Lonyagapuo' ),

        ]

        for dirty, clean in tests:
            self.assertEqual( Alias.clean_up_name(dirty), clean )
Esempio n. 5
0
    def possible_matching_speakers(self, update_aliases=False, name_matching_algorithm=NAME_SET_INTERSECTION_MATCH):
        """
        Return array of person objects that might be the speaker.

        If 'update_aliases' is True (False by default) and the name cannot be
        ignored then an entry will be made in the alias table that so that the
        alias is inspected by an admin.
        """

        name = self.speaker_name
        name = Alias.clean_up_name( name )

        # First check for a matching alias that is not ignored
        try:
            alias = Alias.objects.get( alias=name )

            if alias.ignored:
                # if the alias is ignored we should not match anything
                return []
            elif alias.person:
                return [ alias.person ]
            elif alias.is_unassigned:
                # Pretend that this alias does not exist so that it is checked
                # in case new people have been added to the database since the
                # last run.
                pass
            else:
                return []

        except Alias.DoesNotExist:
            alias = None

        person_search = (
            Person
            .objects
            .all()
            .is_politician( when=self.sitting.start_date )
            .exclude(hidden=True)
            .distinct()
        )

        if name_matching_algorithm == NAME_SUBSTRING_MATCH:
            # drop the prefix
            stripped_name = re.sub(r'^\w+\.\s', '', name)
            person_search = person_search.filter(legal_name__icontains=stripped_name)

            # if the results are ambiguous, try restricting to members of the current house
            # unless it's a joint sitting, in which case this is dangerous
            #
            # FIXME: (1) the position filter currently checks whether a person has *ever* held
            #        a qualifying position, would be better if this were a check against
            #        whether the position was held at date of the sitting.
            #
            #        (2) it might also be interesting to have an optional Pombola Organisation
            #        associated with a Sitting so that it would be easier to check whether the
            #        Person has a matching association with an Organisation rather than checking
            #        PositionTitle names (not sure what would happen with Joint Sittings - dual association?)

            if len(person_search) > 1 and 'Joint Sitting' not in self.sitting.source.name:
                if self.sitting.venue.name == 'Senate':
                    current_house = person_search.filter(position__title__name__contains='Senator')
                else:
                    current_house = person_search.filter(position__title__name__contains=self.sitting.venue.name)
                if current_house:
                    person_search = current_house

        results = person_search.all()[0:]

        if name_matching_algorithm == NAME_SET_INTERSECTION_MATCH:
            results = sorted(
                [i for i in results if self.alias_match_score('%s %s'%(i.title, i.legal_name), name) > 1],
                key=lambda x: self.alias_match_score('%s %s'%(x.title, x.legal_name), name),
                reverse=True,
                )

        found_one_result = len(results) == 1

        # If there is a single matching speaker and an unassigned alias delete it
        if found_one_result and alias and alias.is_unassigned:
            alias.delete()

        # create an entry in the aliases table if one is needed
        if not alias and update_aliases and not found_one_result and not Alias.can_ignore_name(name):
            Alias.objects.create(
                alias   = name,
                ignored = False,
                person  = None,
            )

        return results
Esempio n. 6
0
    def possible_matching_speakers(
            self,
            update_aliases=False,
            name_matching_algorithm=NAME_SET_INTERSECTION_MATCH):
        """
        Return array of person objects that might be the speaker.

        If 'update_aliases' is True (False by default) and the name cannot be
        ignored then an entry will be made in the alias table that so that the
        alias is inspected by an admin.
        """

        name = self.speaker_name

        # Nominated reps don't have a unique speaker name, so fall back to the speaker title
        if re.split(r'[,\s]+', self.speaker_name)[0] == 'Nominated':
            name = self.speaker_title

        name = Alias.clean_up_name(name)

        # First check for a matching alias that is not ignored
        try:
            alias = Alias.objects.get(alias=name)

            if alias.ignored:
                # if the alias is ignored we should not match anything
                return []
            elif alias.person:
                return [alias.person]
            elif alias.is_unassigned:
                # Pretend that this alias does not exist so that it is checked
                # in case new people have been added to the database since the
                # last run.
                pass
            else:
                return []

        except Alias.DoesNotExist:
            alias = None

        person_search = (Person.objects.all().is_politician(
            when=self.sitting.start_date).exclude(hidden=True).distinct())

        if name_matching_algorithm == NAME_SUBSTRING_MATCH:
            # drop the prefix
            stripped_name = re.sub(r'^\w+\.\s', '', name)
            person_search = person_search.filter(
                legal_name__icontains=stripped_name)

            # if the results are ambiguous, try restricting to members of the current house
            # unless it's a joint sitting, in which case this is dangerous
            #
            # FIXME: (1) the position filter currently checks whether a person has *ever* held
            #        a qualifying position, would be better if this were a check against
            #        whether the position was held at date of the sitting.
            #
            #        (2) it might also be interesting to have an optional Pombola Organisation
            #        associated with a Sitting so that it would be easier to check whether the
            #        Person has a matching association with an Organisation rather than checking
            #        PositionTitle names (not sure what would happen with Joint Sittings - dual association?)

            if len(person_search
                   ) > 1 and 'Joint Sitting' not in self.sitting.source.name:
                if self.sitting.venue.name == 'Senate':
                    current_house = person_search.filter(
                        position__title__name__contains='Senator')
                else:
                    current_house = person_search.filter(
                        position__title__name__contains=self.sitting.venue.name
                    )
                if current_house:
                    person_search = current_house

        results = person_search.all()[0:]

        if name_matching_algorithm == NAME_SET_INTERSECTION_MATCH:
            results = sorted(
                [
                    i for i in results if
                    self.alias_match_score('%s %s' %
                                           (i.title, i.legal_name), name) > 1
                ],
                key=lambda x: self.alias_match_score(
                    '%s %s' % (x.title, x.legal_name), name),
                reverse=True,
            )

        if len(results) == 0:
            place_name, party_initials = self.place_name_and_party_initials_from_hansard_name(
                name)
            if place_name and party_initials:
                matches = self.find_person_from_constituency_and_party_reference(
                    place_name, party_initials)
                if matches:
                    results = matches
                else:
                    # Create alias so admins can manually match
                    Alias.objects.get_or_create(alias=name)
                    return []

        found_one_result = len(results) == 1

        # If there is a single matching speaker and an unassigned alias delete it
        if found_one_result and alias and alias.is_unassigned:
            alias.delete()

        # create an entry in the aliases table if one is needed
        if not alias and update_aliases and not found_one_result and not Alias.can_ignore_name(
                name):
            Alias.objects.create(
                alias=name,
                ignored=False,
                person=None,
            )

        return results