def create_entries_from_data_and_source(cls, data, source): """Create the needed sitting and entries""" venue = Venue.objects.get(slug=data['meta']['venue']) # Joint Sittings can be published by both Houses (identical documents) # prevent the same Sitting being created twice if 'Joint Sitting' in source.name \ and Sitting.objects.filter( venue=venue, source__name=source.name, start_date=source.date, start_time=data['meta'].get('start_time', None) ).exists(): print "skipping duplicate source %s for %s" % (source.name, source.date) return None sitting = Sitting( source=source, venue=venue, start_date=source.date, start_time=data['meta'].get('start_time', None), end_date=source.date, end_time=data['meta'].get('end_time', None), ) sitting.save() with transaction.commit_on_success(): counter = 0 for line in data['transcript']: counter += 1 entry = Entry( sitting=sitting, type=line['type'], page_number=line['page_number'], text_counter=counter, speaker_name=line.get('speaker_name', ''), speaker_title=line.get('speaker_title', ''), content=line['text'], ) entry.save() source.last_processing_success = datetime.datetime.now() source.save() return None
def test_multiple_politician_name_matches_senate(self): entry = Entry( sitting=self.senate_sitting, type='text', page_number=12, text_counter=4, speaker_name='Jones', speaker_title='Hon.', content='test', ) possible_speakers = entry.possible_matching_speakers( name_matching_algorithm=NAME_SUBSTRING_MATCH) self.assertEqual(1, len(possible_speakers)) self.assertEqual(self.senator, possible_speakers[0])
def test_multiple_politician_name_matches_joint_sitting(self): self.source.name = "Joint Sitting of the Parliament" self.source.save() entry = Entry( sitting = self.na_sitting, type = 'text', page_number = 12, text_counter = 4, speaker_name = 'Jones', speaker_title = 'Hon.', content = 'test', ) possible_speakers = entry.possible_matching_speakers( name_matching_algorithm=NAME_SUBSTRING_MATCH) self.assertEqual(2, len(possible_speakers))
def test_alias_match_score(self): self.assertEqual( Entry().alias_match_score('Mr Bob Smith', 'Mr Bob Smith'), 3) self.assertEqual(Entry().alias_match_score('Mr Bob Smith', 'Mr Smith'), 2) self.assertEqual( Entry().alias_match_score('Mr Bob Smith', 'Bob Smith'), 2) self.assertEqual(Entry().alias_match_score('Mr Bob Smith', 'Bob'), 1) self.assertEqual(Entry().alias_match_score('Bob Smith', 'Smith, Bob'), 2) self.assertEqual( Entry().alias_match_score('Mr Bob Smith', 'Miss Alice Jones'), 0)
def test_exclude_hidden_profiles(self): self.senator.hidden = True self.senator.save() entry = Entry( sitting=self.senate_sitting, type='text', page_number=12, text_counter=4, speaker_name='Jones', speaker_title='Hon.', content='test', ) possible_speakers = entry.possible_matching_speakers( name_matching_algorithm=NAME_SUBSTRING_MATCH) self.assertEqual(1, len(possible_speakers)) self.assertEqual(self.mp, possible_speakers[0])
def create_entries_from_data_and_source( cls, data, source ): """Create the needed sitting and entries""" venue = Venue.objects.get( slug=data['meta']['venue'] ) sitting = Sitting( source = source, venue = venue, start_date = source.date, start_time = data['meta'].get('start_time', None), end_date = source.date, end_time = data['meta'].get('end_time', None), ) sitting.save() with transaction.commit_on_success(): counter = 0 for line in data['transcript']: counter += 1 entry = Entry( sitting = sitting, type = line['type'], page_number = line['page_number'], text_counter = counter, speaker_name = line.get('speaker_name', ''), speaker_title = line.get('speaker_title', ''), content = line['text'], ) entry.save() source.last_processing_success = datetime.datetime.now() source.save() return None
def test_possible_matching_speakers(self): source = Source( name='Test source', url='http://example.com/foo/bar/testing', date=datetime.date(2011, 1, 3), ) venue = Venue( slug='test-venue', name='Test Venue', ) sitting = Sitting( start_date=datetime.date(2011, 1, 2), source=source, venue=venue, ) entry = Entry(sitting=sitting, ) james_smith = Person.objects.create( legal_name='James Smith', slug='james-smith', ) james_smith2 = Person.objects.create( title='Mr', legal_name='Bob Smith James', slug='james-smith2', ) mp = PositionTitle.objects.create( name='Member of Parliament', slug='mp', ) Position.objects.create( person=james_smith, title=mp, start_date=ApproximateDate(year=2011, month=1, day=1), end_date=ApproximateDate(future=True), category='political', ) Position.objects.create( person=james_smith2, title=mp, start_date=ApproximateDate(year=2011, month=1, day=1), end_date=ApproximateDate(future=True), category='political', ) entry.speaker_name = 'James Smith' speakers = entry.possible_matching_speakers( name_matching_algorithm=NAME_SUBSTRING_MATCH) self.assertListEqual(list(speakers), [james_smith]) entry.speaker_name = 'Mr Smith' speakers = entry.possible_matching_speakers( name_matching_algorithm=NAME_SUBSTRING_MATCH) self.assertItemsEqual(speakers, (james_smith, james_smith2)) speakers = entry.possible_matching_speakers( name_matching_algorithm=NAME_SET_INTERSECTION_MATCH) self.assertListEqual(list(speakers), [james_smith2]) entry.speaker_name = 'Mr James Smith' speakers = entry.possible_matching_speakers( name_matching_algorithm=NAME_SUBSTRING_MATCH) self.assertListEqual(list(speakers), [james_smith]) speakers = entry.possible_matching_speakers( name_matching_algorithm=NAME_SET_INTERSECTION_MATCH) self.assertListEqual(list(speakers), [james_smith2, james_smith])