Ejemplo n.º 1
0
    def test_possible_matching_speakers(self):
        sitting = Sitting(
            start_date=datetime.date(2011, 1, 2),
            )

        entry = Entry(
            sitting=sitting,
            )

        james_smith = Person.objects.create(
            legal_name='James Smith',
            slug='james-smith',
            )

        james_smith2 = Person.objects.create(
            title='Mr',
            legal_name='Bob Smith James',
            slug='james-smith2',
            )

        mp = PositionTitle.objects.create(
            name='Member of Parliament',
            slug='mp',
            )

        Position.objects.create(
            person=james_smith,
            title=mp,
            start_date=ApproximateDate(year=2011, month=1, day=1),
            end_date=ApproximateDate(future=True),
            category='political',
            )

        Position.objects.create(
            person=james_smith2,
            title=mp,
            start_date=ApproximateDate(year=2011, month=1, day=1),
            end_date=ApproximateDate(future=True),
            category='political',
            )

        entry.speaker_name = 'James Smith'
        speakers = entry.possible_matching_speakers(name_matching_algorithm=NAME_SUBSTRING_MATCH)
        self.assertListEqual(list(speakers), [james_smith])

        entry.speaker_name = 'Mr Smith'
        speakers = entry.possible_matching_speakers(name_matching_algorithm=NAME_SUBSTRING_MATCH)
        self.assertItemsEqual(speakers, (james_smith, james_smith2))

        speakers = entry.possible_matching_speakers(name_matching_algorithm=NAME_SET_INTERSECTION_MATCH)
        self.assertListEqual(list(speakers), [james_smith2])

        entry.speaker_name = 'Mr James Smith'
        speakers = entry.possible_matching_speakers(name_matching_algorithm=NAME_SUBSTRING_MATCH)
        self.assertListEqual(list(speakers), [james_smith])

        speakers = entry.possible_matching_speakers(name_matching_algorithm=NAME_SET_INTERSECTION_MATCH)
        self.assertListEqual(list(speakers), [james_smith2, james_smith])
Ejemplo n.º 2
0
    def create_entries_from_data_and_source(cls, data, source):
        """Create the needed sitting and entries"""

        venue = Venue.objects.get(slug=data['meta']['venue'])

        # Joint Sittings can be published by both Houses (identical documents)
        # prevent the same Sitting being created twice
        if 'Joint Sitting' in source.name \
            and Sitting.objects.filter(
                    venue=venue,
                    source__name=source.name,
                    start_date=source.date,
                    start_time=data['meta'].get('start_time', None)
                ).exists():
            print "skipping duplicate source %s for %s" % (source.name,
                                                           source.date)
            return None

        sitting = Sitting(
            source=source,
            venue=venue,
            start_date=source.date,
            start_time=data['meta'].get('start_time', None),
            end_date=source.date,
            end_time=data['meta'].get('end_time', None),
        )
        sitting.save()

        with transaction.commit_on_success():
            counter = 0
            for line in data['transcript']:

                counter += 1

                entry = Entry(
                    sitting=sitting,
                    type=line['type'],
                    page_number=line['page_number'],
                    text_counter=counter,
                    speaker_name=line.get('speaker_name', ''),
                    speaker_title=line.get('speaker_title', ''),
                    content=line['text'],
                )
                entry.save()

            source.last_processing_success = datetime.datetime.now()
            source.save()

        return None
Ejemplo n.º 3
0
    def test_multiple_politician_name_matches_senate(self):
        entry = Entry(
            sitting=self.senate_sitting,
            type='text',
            page_number=12,
            text_counter=4,
            speaker_name='Jones',
            speaker_title='Hon.',
            content='test',
        )
        possible_speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)

        self.assertEqual(1, len(possible_speakers))
        self.assertEqual(self.senator, possible_speakers[0])
Ejemplo n.º 4
0
    def create_entries_from_data_and_source( cls, data, source ):
        """Create the needed sitting and entries"""

        venue = Venue.objects.get( slug=data['meta']['venue'] )

        # Joint Sittings can be published by both Houses (identical documents)
        # prevent the same Sitting being created twice
        if 'Joint Sitting' in source.name \
            and Sitting.objects.filter(
                    venue=venue,
                    source__name=source.name,
                    start_date=source.date,
                    start_time=data['meta'].get('start_time', None)
                ).exists():
            print "skipping duplicate source %s for %s" % (source.name, source.date)
            return None

        sitting = Sitting(
            source     = source,
            venue      = venue,
            start_date = source.date,
            start_time = data['meta'].get('start_time', None),
            end_date   = source.date,
            end_time   = data['meta'].get('end_time', None),
        )
        sitting.save()

        with transaction.commit_on_success():
            counter = 0
            for line in data['transcript']:

                counter += 1

                entry = Entry(
                    sitting       = sitting,
                    type          = line['type'],
                    page_number   = line['page_number'],
                    text_counter  = counter,
                    speaker_name  = line.get('speaker_name',  ''),
                    speaker_title = line.get('speaker_title', ''),
                    content       = line['text'],
                )
                entry.save()

            source.last_processing_success = datetime.datetime.now()
            source.save()

        return None
Ejemplo n.º 5
0
    def test_multiple_politician_name_matches_joint_sitting(self):
        self.source.name = "Joint Sitting of the Parliament"
        self.source.save()

        entry = Entry(
            sitting       = self.na_sitting,
            type          = 'text',
            page_number   = 12,
            text_counter  = 4,
            speaker_name  = 'Jones',
            speaker_title = 'Hon.',
            content       = 'test',
        )
        possible_speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)
        self.assertEqual(2, len(possible_speakers))
Ejemplo n.º 6
0
    def test_multiple_politician_name_matches_joint_sitting(self):
        self.source.name = "Joint Sitting of the Parliament"
        self.source.save()

        entry = Entry(
            sitting       = self.na_sitting,
            type          = 'text',
            page_number   = 12,
            text_counter  = 4,
            speaker_name  = 'Jones',
            speaker_title = 'Hon.',
            content       = 'test',
        )
        possible_speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)
        self.assertEqual(2, len(possible_speakers))
Ejemplo n.º 7
0
    def test_multiple_politician_name_matches_senate(self):
        entry = Entry(
            sitting       = self.senate_sitting,
            type          = 'text',
            page_number   = 12,
            text_counter  = 4,
            speaker_name  = 'Jones',
            speaker_title = 'Hon.',
            content       = 'test',
        )
        possible_speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)

        self.assertEqual(1, len(possible_speakers))
        self.assertEqual(
            self.senator,
            possible_speakers[0]
        )
Ejemplo n.º 8
0
    def test_exclude_hidden_profiles(self):
        self.senator.hidden = True
        self.senator.save()

        entry = Entry(
            sitting=self.senate_sitting,
            type='text',
            page_number=12,
            text_counter=4,
            speaker_name='Jones',
            speaker_title='Hon.',
            content='test',
        )
        possible_speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)

        self.assertEqual(1, len(possible_speakers))
        self.assertEqual(self.mp, possible_speakers[0])
Ejemplo n.º 9
0
    def test_exclude_hidden_profiles(self):
        self.senator.hidden = True
        self.senator.save()

        entry = Entry(
            sitting       = self.senate_sitting,
            type          = 'text',
            page_number   = 12,
            text_counter  = 4,
            speaker_name  = 'Jones',
            speaker_title = 'Hon.',
            content       = 'test',
        )
        possible_speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)

        self.assertEqual(1, len(possible_speakers))
        self.assertEqual(
            self.mp,
            possible_speakers[0]
        )
Ejemplo n.º 10
0
    def create_entries_from_data_and_source( cls, data, source ):
        """Create the needed sitting and entries"""

        venue = Venue.objects.get( slug=data['meta']['venue'] )

        sitting = Sitting(
            source     = source,
            venue      = venue,
            start_date = source.date,
            start_time = data['meta'].get('start_time', None),
            end_date   = source.date,
            end_time   = data['meta'].get('end_time', None),
        )
        sitting.save()
        

        with transaction.commit_on_success():
            counter = 0
            for line in data['transcript']:
                
                counter += 1
                
                entry = Entry(
                    sitting       = sitting,
                    type          = line['type'],
                    page_number   = line['page_number'],
                    text_counter  = counter,
                    speaker_name  = line.get('speaker_name',  ''),
                    speaker_title = line.get('speaker_title', ''),
                    content       = line['text'],
                )
                entry.save()

            source.last_processing_success = datetime.datetime.now()
            source.save()
        
        return None
Ejemplo n.º 11
0
 def test_alias_match_score(self):
     self.assertEqual(
         Entry().alias_match_score('Mr Bob Smith', 'Mr Bob Smith'), 3)
     self.assertEqual(Entry().alias_match_score('Mr Bob Smith', 'Mr Smith'),
                      2)
     self.assertEqual(
         Entry().alias_match_score('Mr Bob Smith', 'Bob Smith'), 2)
     self.assertEqual(Entry().alias_match_score('Mr Bob Smith', 'Bob'), 1)
     self.assertEqual(Entry().alias_match_score('Bob Smith', 'Smith, Bob'),
                      2)
     self.assertEqual(
         Entry().alias_match_score('Mr Bob Smith', 'Miss Alice Jones'), 0)
Ejemplo n.º 12
0
    def test_assign_speaker_names(self):
        """Test that the speaker names are assigned as expected"""

        # This should really be in a separate file as it is not related to the
        # Kenya parser, but keeping it here for now as it is a step in the
        # parsing flow that is being tested.

        # set up the entries
        source = self._create_source_and_load_test_json_to_entries()

        entry_qs = Entry.objects.all()
        unassigned_aliases_qs = Alias.objects.all().unassigned()

        # check that none of the speakers are assigned
        self.assertEqual( entry_qs.unassigned_speeches().count(), 31 )

        # Assign speakers
        Entry.assign_speakers(name_matching_algorithm=settings.HANSARD_NAME_MATCHING_ALGORITHM)

        # check that none of the speakers got assigned - there are no entries in the database
        self.assertEqual( entry_qs.unassigned_speeches().count(), 31 )
        self.assertEqual( unassigned_aliases_qs.count(), 11 )


        # print entry_qs.unassigned_speaker_names()


        # Add an mp that should match but don't make an mp - no match
        james_gabbow = Person.objects.create(
            legal_name = 'James Gabbow',
            slug       = 'james-gabbow',
        )
        Entry.assign_speakers(name_matching_algorithm=settings.HANSARD_NAME_MATCHING_ALGORITHM)
        self.assertEqual( entry_qs.unassigned_speeches().count(), 31 )
        self.assertEqual( unassigned_aliases_qs.count(), 11 )

        # create the position - check matched
        mp = PositionTitle.objects.create(
            name = 'Member of Parliament',
            slug = 'mp',
        )
        Position.objects.create(
            person     = james_gabbow,
            title      = mp,
            start_date = ApproximateDate( year=2011, month=1, day = 1 ),
            end_date   = ApproximateDate( future=True ),
            category = 'political',
        )
        Entry.assign_speakers(name_matching_algorithm=settings.HANSARD_NAME_MATCHING_ALGORITHM)
        self.assertEqual( entry_qs.unassigned_speeches().count(), 26 )
        self.assertEqual( unassigned_aliases_qs.count(), 10 )

        # Add a nominated MP and check it is matched

        nominated_politician = PositionTitle.objects.create(
            name='Nominated MP',
            slug='nominated-member-parliament',
            )

        calist_mwatela = Person.objects.create(
            legal_name='Calist Mwatela',
            slug='calist-mwatela',
            )

        Position.objects.create(
            person = calist_mwatela,
            title = nominated_politician,
            start_date = ApproximateDate( year=2011, month=1, day = 1 ),
            end_date   = ApproximateDate( future=True ),
            category = 'political',
            )

        Entry.assign_speakers(name_matching_algorithm=settings.HANSARD_NAME_MATCHING_ALGORITHM)
        self.assertEqual( entry_qs.unassigned_speeches().count(), 24 )
        self.assertEqual( unassigned_aliases_qs.count(), 9 )

        # Add an mp that is no longer current, check not matched
        bob_musila = Person.objects.create(
            legal_name = 'Bob Musila',
            slug       = 'bob-musila',
        )
        Position.objects.create(
            person     = james_gabbow,
            title      = mp,
            start_date = ApproximateDate( year=2007, month=1, day = 1 ),
            end_date   = ApproximateDate( year=2009, month=1, day = 1 ),
            category = 'political',
        )
        Entry.assign_speakers(name_matching_algorithm=settings.HANSARD_NAME_MATCHING_ALGORITHM)
        self.assertEqual( entry_qs.unassigned_speeches().count(), 24 )
        self.assertEqual( unassigned_aliases_qs.count(), 9 )

        # Add a name to the aliases and check it is matched
        betty_laboso = Person.objects.create(
            legal_name = 'Betty Laboso',
            slug       = 'betty-laboso',
        )
        betty_laboso_alias = Alias.objects.get(alias  = 'Dr. Laboso')
        betty_laboso_alias.person = betty_laboso
        betty_laboso_alias.save()

        Entry.assign_speakers(name_matching_algorithm=settings.HANSARD_NAME_MATCHING_ALGORITHM)
        self.assertEqual( entry_qs.unassigned_speeches().count(), 22 )
        self.assertEqual( unassigned_aliases_qs.count(), 8 )

        # Add a name to alias that should be ignored, check not matched but not listed in names any more
        prof_kaloki_alias = Alias.objects.get( alias = 'Prof. Kaloki')
        prof_kaloki_alias.ignored = True
        prof_kaloki_alias.save()

        Entry.assign_speakers(name_matching_algorithm=settings.HANSARD_NAME_MATCHING_ALGORITHM)
        self.assertEqual( entry_qs.unassigned_speeches().count(), 22 )
        self.assertEqual( unassigned_aliases_qs.count(), 7 )

        # Add all remaining names to alias and check that all matched
        for alias in unassigned_aliases_qs.all():
            alias.person = betty_laboso
            alias.save()

        Entry.assign_speakers(name_matching_algorithm=settings.HANSARD_NAME_MATCHING_ALGORITHM)
        self.assertEqual( entry_qs.unassigned_speeches().count(), 8 )
        self.assertEqual( unassigned_aliases_qs.count(), 0 )
Ejemplo n.º 13
0
    def handle_noargs(self, **options):

        Entry.assign_speakers()
Ejemplo n.º 14
0
    def handle_noargs(self, **options):

        Entry.assign_speakers()
Ejemplo n.º 15
0
    def test_possible_matching_speakers(self):
        source = Source(
            name='Test source',
            url='http://example.com/foo/bar/testing',
            date=datetime.date(2011, 1, 3),
        )

        venue = Venue(
            slug='test-venue',
            name='Test Venue',
        )

        sitting = Sitting(
            start_date=datetime.date(2011, 1, 2),
            source=source,
            venue=venue,
        )

        entry = Entry(sitting=sitting, )

        james_smith = Person.objects.create(
            legal_name='James Smith',
            slug='james-smith',
        )

        james_smith2 = Person.objects.create(
            title='Mr',
            legal_name='Bob Smith James',
            slug='james-smith2',
        )

        mp = PositionTitle.objects.create(
            name='Member of Parliament',
            slug='mp',
        )

        Position.objects.create(
            person=james_smith,
            title=mp,
            start_date=ApproximateDate(year=2011, month=1, day=1),
            end_date=ApproximateDate(future=True),
            category='political',
        )

        Position.objects.create(
            person=james_smith2,
            title=mp,
            start_date=ApproximateDate(year=2011, month=1, day=1),
            end_date=ApproximateDate(future=True),
            category='political',
        )

        entry.speaker_name = 'James Smith'
        speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)
        self.assertListEqual(list(speakers), [james_smith])

        entry.speaker_name = 'Mr Smith'
        speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)
        self.assertItemsEqual(speakers, (james_smith, james_smith2))

        speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SET_INTERSECTION_MATCH)
        self.assertListEqual(list(speakers), [james_smith2])

        entry.speaker_name = 'Mr James Smith'
        speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)
        self.assertListEqual(list(speakers), [james_smith])

        speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SET_INTERSECTION_MATCH)
        self.assertListEqual(list(speakers), [james_smith2, james_smith])
Ejemplo n.º 16
0
    def test_assign_speaker_names(self):
        """Test that the speaker names are assigned as expected"""

        # This should really be in a separate file as it is not related to the
        # Kenya parser, but keeping it here for now as it is a step in the
        # parsing flow that is being tested.

        # set up the entries
        source = self._create_source_and_load_test_json_to_entries()

        entry_qs = Entry.objects.all()
        unassigned_aliases_qs = Alias.objects.all().unassigned()

        # check that none of the speakers are assigned
        self.assertEqual(entry_qs.unassigned_speeches().count(), 31)

        # Assign speakers
        Entry.assign_speakers()

        # check that none of the speakers got assigned - there are no entries in the database
        self.assertEqual(entry_qs.unassigned_speeches().count(), 31)
        self.assertEqual(unassigned_aliases_qs.count(), 11)

        # print entry_qs.unassigned_speaker_names()

        # Add an mp that should match but don't make an mp - no match
        james_gabbow = Person.objects.create(
            legal_name='James Gabbow',
            slug='james-gabbow',
        )
        Entry.assign_speakers()
        self.assertEqual(entry_qs.unassigned_speeches().count(), 31)
        self.assertEqual(unassigned_aliases_qs.count(), 11)

        # create the position - check matched
        mp = PositionTitle.objects.create(
            name='Member of Parliament',
            slug='mp',
        )
        Position.objects.create(
            person=james_gabbow,
            title=mp,
            start_date=ApproximateDate(year=2011, month=1, day=1),
            end_date=ApproximateDate(future=True),
        )
        Entry.assign_speakers()
        self.assertEqual(entry_qs.unassigned_speeches().count(), 26)
        self.assertEqual(unassigned_aliases_qs.count(), 10)

        # Add a nominated MP and check it is matched

        nominated_politician = PositionTitle.objects.create(
            name='Nominated MP',
            slug='nominated-member-parliament',
        )

        calist_mwatela = Person.objects.create(
            legal_name='Calist Mwatela',
            slug='calist-mwatela',
        )

        Position.objects.create(
            person=calist_mwatela,
            title=nominated_politician,
            start_date=ApproximateDate(year=2011, month=1, day=1),
            end_date=ApproximateDate(future=True),
        )

        Entry.assign_speakers()
        self.assertEqual(entry_qs.unassigned_speeches().count(), 24)
        self.assertEqual(unassigned_aliases_qs.count(), 9)

        # Add an mp that is no longer current, check not matched
        bob_musila = Person.objects.create(
            legal_name='Bob Musila',
            slug='bob-musila',
        )
        Position.objects.create(
            person=james_gabbow,
            title=mp,
            start_date=ApproximateDate(year=2007, month=1, day=1),
            end_date=ApproximateDate(year=2009, month=1, day=1),
        )
        Entry.assign_speakers()
        self.assertEqual(entry_qs.unassigned_speeches().count(), 24)
        self.assertEqual(unassigned_aliases_qs.count(), 9)

        # Add a name to the aliases and check it is matched
        betty_laboso = Person.objects.create(
            legal_name='Betty Laboso',
            slug='betty-laboso',
        )
        betty_laboso_alias = Alias.objects.get(alias='Dr. Laboso')
        betty_laboso_alias.person = betty_laboso
        betty_laboso_alias.save()

        Entry.assign_speakers()
        self.assertEqual(entry_qs.unassigned_speeches().count(), 22)
        self.assertEqual(unassigned_aliases_qs.count(), 8)

        # Add a name to alias that should be ignored, check not matched but not listed in names any more
        prof_kaloki_alias = Alias.objects.get(alias='Prof. Kaloki')
        prof_kaloki_alias.ignored = True
        prof_kaloki_alias.save()

        Entry.assign_speakers()
        self.assertEqual(entry_qs.unassigned_speeches().count(), 22)
        self.assertEqual(unassigned_aliases_qs.count(), 7)

        # Add all remaining names to alias and check that all matched
        for alias in unassigned_aliases_qs.all():
            alias.person = betty_laboso
            alias.save()

        Entry.assign_speakers()
        self.assertEqual(entry_qs.unassigned_speeches().count(), 8)
        self.assertEqual(unassigned_aliases_qs.count(), 0)
Ejemplo n.º 17
0
 def handle_noargs(self, **options):
     algorithm = settings.HANSARD_NAME_MATCHING_ALGORITHM
     Entry.assign_speakers(name_matching_algorithm=algorithm)
Ejemplo n.º 18
0
 def handle_noargs(self, **options):
     algorithm = settings.HANSARD_NAME_MATCHING_ALGORITHM
     Entry.assign_speakers(name_matching_algorithm=algorithm)