def test_can_ignore_some_speakers(self): # These are all names that appear because the parser sometimes gets confused. # Rather than fix the parser (very hard) make sure that we ignore these names so # that missing name report is not so long. speaker_names = [ "10 Thursday 10th February, 2011(P) Mr. Kombo", "(a)", "Act to 58A.", "ADJOURNMENT 29 Wednesday, 1st December, 2010 (A) Mr. Deputy Speaker", "April 21, 2009 PARLIAMENTARY DEBATES 2 Mr. Speaker", "(b)", "Cap.114 26.", "COMMUNICATION FROM THE CHAIR Mr. Speaker", "Deputy Speaker", "(i) Energy, Communications and Information Committee", "NOTICES OF MOTIONS Mr. Affey", "QUORUM Mr. Ahenda", "Tellers of Ayes", "The Assistant for Lands", "The Assistant Minister for Agriculture", "The Attorney-General", "The Member for Fafi", "The Minister for Roads", ] false_count = 0 for name in speaker_names: result = Entry.can_ignore_name( name ) if not result: print "Got True for Entry.can_ignore_name( '%s' ), expecting False" % name false_count += 1 self.assertEqual( false_count, 0 )
def create_entries_from_data_and_source( cls, data, source ): """Create the needed sitting and entries""" venue = Venue.objects.get( slug=data['meta']['venue'] ) sitting = Sitting( source = source, venue = venue, start_date = source.date, start_time = data['meta'].get('start_time', None), end_date = source.date, end_time = data['meta'].get('end_time', None), ) sitting.save() with transaction.commit_on_success(): counter = 0 for line in data['transcript']: counter += 1 entry = Entry( sitting = sitting, type = line['type'], page_number = line['page_number'], text_counter = counter, speaker_name = line.get('speaker_name', ''), speaker_title = line.get('speaker_title', ''), content = line['text'], ) entry.save() source.last_processing_success = datetime.datetime.now() source.save() return None
def create_entries_from_data_and_source(cls, data, source): """Create the needed sitting and entries""" venue = Venue.objects.get(slug=data['meta']['venue']) sitting = Sitting( source=source, venue=venue, start_date=source.date, start_time=data['meta'].get('start_time', None), end_date=source.date, end_time=data['meta'].get('end_time', None), ) sitting.save() with transaction.commit_on_success(): counter = 0 for line in data['transcript']: counter += 1 entry = Entry( sitting=sitting, type=line['type'], page_number=line['page_number'], text_counter=counter, speaker_name=line.get('speaker_name', ''), speaker_title=line.get('speaker_title', ''), content=line['text'], ) entry.save() source.last_processing_success = datetime.datetime.now() source.save() return None
def handle_noargs(self, **options): Entry.assign_speakers()
def test_assign_speaker_names(self): """Test that the speaker names are assigned as expected""" # This should really be in a separate file as it is not related to the # Kenya parser, but keeping it here for now as it is a step in the # parsing flow that is being tested. # set up the entries source = self._create_source_and_load_test_json_to_entries() entry_qs = Entry.objects.all() unassigned_aliases_qs = Alias.objects.all().unassigned() # check that none of the speakers are assigned self.assertEqual( entry_qs.unassigned_speeches().count(), 31 ) # Assign speakers Entry.assign_speakers() # check that none of the speakers got assigned - there are no entries in the database self.assertEqual( entry_qs.unassigned_speeches().count(), 31 ) self.assertEqual( unassigned_aliases_qs.count(), 11 ) # print entry_qs.unassigned_speaker_names() # Add an mp that should match but don't make an mp - no match james_gabbow = Person.objects.create( legal_name = 'James Gabbow', slug = 'james-gabbow', ) Entry.assign_speakers() self.assertEqual( entry_qs.unassigned_speeches().count(), 31 ) self.assertEqual( unassigned_aliases_qs.count(), 11 ) # create the position - check matched mp = PositionTitle.objects.create( name = 'Member of Parliament', slug = 'mp', ) Position.objects.create( person = james_gabbow, title = mp, start_date = ApproximateDate( year=2011, month=1, day = 1 ), end_date = ApproximateDate( future=True ), ) Entry.assign_speakers() self.assertEqual( entry_qs.unassigned_speeches().count(), 26 ) self.assertEqual( unassigned_aliases_qs.count(), 10 ) # Add a nominated MP and check it is matched nominated_mp = PositionTitle.objects.create( name='Nominated MP', slug='nominated-member-parliament', ) calist_mwatela = Person.objects.create( legal_name='Calist Mwatela', slug='calist-mwatela', ) Position.objects.create( person = calist_mwatela, title = nominated_mp, start_date = ApproximateDate( year=2011, month=1, day = 1 ), end_date = ApproximateDate( future=True ), ) Entry.assign_speakers() self.assertEqual( entry_qs.unassigned_speeches().count(), 24 ) self.assertEqual( unassigned_aliases_qs.count(), 9 ) # Add an mp that is no longer current, check not matched bob_musila = Person.objects.create( legal_name = 'Bob Musila', slug = 'bob-musila', ) Position.objects.create( person = james_gabbow, title = mp, start_date = ApproximateDate( year=2007, month=1, day = 1 ), end_date = ApproximateDate( year=2009, month=1, day = 1 ), ) Entry.assign_speakers() self.assertEqual( entry_qs.unassigned_speeches().count(), 24 ) self.assertEqual( unassigned_aliases_qs.count(), 9 ) # Add a name to the aliases and check it is matched betty_laboso = Person.objects.create( legal_name = 'Betty Laboso', slug = 'betty-laboso', ) betty_laboso_alias = Alias.objects.get(alias = 'Dr. Laboso') betty_laboso_alias.person = betty_laboso betty_laboso_alias.save() Entry.assign_speakers() self.assertEqual( entry_qs.unassigned_speeches().count(), 22 ) self.assertEqual( unassigned_aliases_qs.count(), 8 ) # Add a name to alias that should be ignored, check not matched but not listed in names any more prof_kaloki_alias = Alias.objects.get( alias = 'Prof. Kaloki') prof_kaloki_alias.ignored = True prof_kaloki_alias.save() Entry.assign_speakers() self.assertEqual( entry_qs.unassigned_speeches().count(), 22 ) self.assertEqual( unassigned_aliases_qs.count(), 7 ) # Add all remaining names to alias and check that all matched for alias in unassigned_aliases_qs.all(): alias.person = betty_laboso alias.save() Entry.assign_speakers() self.assertEqual( entry_qs.unassigned_speeches().count(), 8 ) self.assertEqual( unassigned_aliases_qs.count(), 0 )
def test_assign_speaker_names(self): """Test that the speaker names are assigned as expected""" # This should really be in a separate file as it is not related to the # Kenya parser, but keeping it here for now as it is a step in the # parsing flow that is being tested. # set up the entries source = self._create_source_and_load_test_json_to_entries() entry_qs = Entry.objects.all() unassigned_aliases_qs = Alias.objects.all().unassigned() # check that none of the speakers are assigned self.assertEqual(entry_qs.unassigned_speeches().count(), 31) # Assign speakers Entry.assign_speakers() # check that none of the speakers got assigned - there are no entries in the database self.assertEqual(entry_qs.unassigned_speeches().count(), 31) self.assertEqual(unassigned_aliases_qs.count(), 11) # print entry_qs.unassigned_speaker_names() # Add an mp that should match but don't make an mp - no match james_gabbow = Person.objects.create( legal_name='James Gabbow', slug='james-gabbow', ) Entry.assign_speakers() self.assertEqual(entry_qs.unassigned_speeches().count(), 31) self.assertEqual(unassigned_aliases_qs.count(), 11) # create the position - check matched mp = PositionTitle.objects.create( name='Member of Parliament', slug='mp', ) Position.objects.create( person=james_gabbow, title=mp, start_date=ApproximateDate(year=2011, month=1, day=1), end_date=ApproximateDate(future=True), ) Entry.assign_speakers() self.assertEqual(entry_qs.unassigned_speeches().count(), 26) self.assertEqual(unassigned_aliases_qs.count(), 10) # Add a nominated MP and check it is matched nominated_politician = PositionTitle.objects.create( name='Nominated MP', slug='nominated-member-parliament', ) calist_mwatela = Person.objects.create( legal_name='Calist Mwatela', slug='calist-mwatela', ) Position.objects.create( person=calist_mwatela, title=nominated_politician, start_date=ApproximateDate(year=2011, month=1, day=1), end_date=ApproximateDate(future=True), ) Entry.assign_speakers() self.assertEqual(entry_qs.unassigned_speeches().count(), 24) self.assertEqual(unassigned_aliases_qs.count(), 9) # Add an mp that is no longer current, check not matched bob_musila = Person.objects.create( legal_name='Bob Musila', slug='bob-musila', ) Position.objects.create( person=james_gabbow, title=mp, start_date=ApproximateDate(year=2007, month=1, day=1), end_date=ApproximateDate(year=2009, month=1, day=1), ) Entry.assign_speakers() self.assertEqual(entry_qs.unassigned_speeches().count(), 24) self.assertEqual(unassigned_aliases_qs.count(), 9) # Add a name to the aliases and check it is matched betty_laboso = Person.objects.create( legal_name='Betty Laboso', slug='betty-laboso', ) betty_laboso_alias = Alias.objects.get(alias='Dr. Laboso') betty_laboso_alias.person = betty_laboso betty_laboso_alias.save() Entry.assign_speakers() self.assertEqual(entry_qs.unassigned_speeches().count(), 22) self.assertEqual(unassigned_aliases_qs.count(), 8) # Add a name to alias that should be ignored, check not matched but not listed in names any more prof_kaloki_alias = Alias.objects.get(alias='Prof. Kaloki') prof_kaloki_alias.ignored = True prof_kaloki_alias.save() Entry.assign_speakers() self.assertEqual(entry_qs.unassigned_speeches().count(), 22) self.assertEqual(unassigned_aliases_qs.count(), 7) # Add all remaining names to alias and check that all matched for alias in unassigned_aliases_qs.all(): alias.person = betty_laboso alias.save() Entry.assign_speakers() self.assertEqual(entry_qs.unassigned_speeches().count(), 8) self.assertEqual(unassigned_aliases_qs.count(), 0)