def setUp(self): self.source = Source.objects.create(date=date(2011, 11, 15), name='test source') na = Venue.objects.create(name='National Assembly', slug='national-assembly') senate = Venue.objects.create(name='Senate', slug='senate') self.na_sitting = Sitting( source=self.source, venue=na, start_date=date(2011, 11, 15), ) self.senate_sitting = Sitting( source=self.source, venue=senate, start_date=date(2011, 11, 15), ) na_member_title = PositionTitle.objects.create( name='Member of the National Assembly', slug='member-national-assembly', ) senate_member_title = PositionTitle.objects.create(name='Senator', slug='senator') place_kind_test = PlaceKind.objects.create(name='Test', ) test_place = Place.objects.create( name="Some Place", slug='some_place', kind=place_kind_test, ) self.senator = Person.objects.create(legal_name='Tom Jones', slug='tom-jones') self.mp = Person.objects.create(legal_name='Paul Jones', slug='paul-jones') senate_position = Position.objects.create( person=self.senator, place=test_place, category='political', title=senate_member_title, ) na_position = Position.objects.create( person=self.mp, place=test_place, category='political', title=na_member_title, )
def test_sitting_naming(self): """Check that the name is correct for the various permutations""" sitting = Sitting( source=self.source, venue=self.venue, start_date=date(2011, 11, 15), ) self.assertEqual(str(sitting), 'National Assembly 2011-11-15') sitting.end_date = date(2011, 11, 15) self.assertEqual(str(sitting), 'National Assembly 2011-11-15') sitting.end_date = date(2011, 11, 16) self.assertEqual(str(sitting), 'National Assembly 2011-11-15 to 2011-11-16') sitting.start_time = time(13, 0) self.assertEqual(str(sitting), 'National Assembly 2011-11-15 13:00 to 2011-11-16') sitting.end_date = date(2011, 11, 15) self.assertEqual(str(sitting), 'National Assembly 2011-11-15: 13:00') sitting.end_time = time(18, 0) self.assertEqual(str(sitting), 'National Assembly 2011-11-15: 13:00 to 18:00') sitting.end_date = date(2011, 11, 16) self.assertEqual( str(sitting), 'National Assembly 2011-11-15 13:00 to 2011-11-16 18:00')
def create_entries_from_data_and_source(cls, data, source): """Create the needed sitting and entries""" venue = Venue.objects.get(slug=data['meta']['venue']) # Joint Sittings can be published by both Houses (identical documents) # prevent the same Sitting being created twice if 'Joint Sitting' in source.name \ and Sitting.objects.filter( venue=venue, source__name=source.name, start_date=source.date, start_time=data['meta'].get('start_time', None) ).exists(): print "skipping duplicate source %s for %s" % (source.name, source.date) return None sitting = Sitting( source=source, venue=venue, start_date=source.date, start_time=data['meta'].get('start_time', None), end_date=source.date, end_time=data['meta'].get('end_time', None), ) sitting.save() with transaction.commit_on_success(): counter = 0 for line in data['transcript']: counter += 1 entry = Entry( sitting=sitting, type=line['type'], page_number=line['page_number'], text_counter=counter, speaker_name=line.get('speaker_name', ''), speaker_title=line.get('speaker_title', ''), content=line['text'], ) entry.save() source.last_processing_success = datetime.datetime.now() source.save() return None
def create_entries_from_data_and_source( cls, data, source ): """Create the needed sitting and entries""" venue = Venue.objects.get( slug=data['meta']['venue'] ) # Joint Sittings can be published by both Houses (identical documents) # prevent the same Sitting being created twice if 'Joint Sitting' in source.name \ and Sitting.objects.filter( venue=venue, source__name=source.name, start_date=source.date, start_time=data['meta'].get('start_time', None) ).exists(): print "skipping duplicate source %s for %s" % (source.name, source.date) return None sitting = Sitting( source = source, venue = venue, start_date = source.date, start_time = data['meta'].get('start_time', None), end_date = source.date, end_time = data['meta'].get('end_time', None), ) sitting.save() with transaction.commit_on_success(): counter = 0 for line in data['transcript']: counter += 1 entry = Entry( sitting = sitting, type = line['type'], page_number = line['page_number'], text_counter = counter, speaker_name = line.get('speaker_name', ''), speaker_title = line.get('speaker_title', ''), content = line['text'], ) entry.save() source.last_processing_success = datetime.datetime.now() source.save() return None
def test_sitting_naming(self): """Check that the name is correct for the various permutations""" sitting = Sitting( source = self.source, venue = self.venue, start_date = date( 2011, 11, 15 ), ) self.assertEqual( str(sitting), 'National Assembly 2011-11-15' ) sitting.end_date = date( 2011, 11, 15 ) self.assertEqual( str(sitting), 'National Assembly 2011-11-15' ) sitting.end_date = date( 2011, 11, 16 ) self.assertEqual( str(sitting), 'National Assembly 2011-11-15 to 2011-11-16' ) sitting.start_time = time( 13, 0 ) self.assertEqual( str(sitting), 'National Assembly 2011-11-15 13:00 to 2011-11-16' ) sitting.end_date = date( 2011, 11, 15 ) self.assertEqual( str(sitting), 'National Assembly 2011-11-15: 13:00' ) sitting.end_time = time( 18, 0 ) self.assertEqual( str(sitting), 'National Assembly 2011-11-15: 13:00 to 18:00' ) sitting.end_date = date( 2011, 11, 16 ) self.assertEqual( str(sitting), 'National Assembly 2011-11-15 13:00 to 2011-11-16 18:00' )
def create_entries_from_data_and_source( cls, data, source ): """Create the needed sitting and entries""" venue = Venue.objects.get( slug=data['meta']['venue'] ) sitting = Sitting( source = source, venue = venue, start_date = source.date, start_time = data['meta'].get('start_time', None), end_date = source.date, end_time = data['meta'].get('end_time', None), ) sitting.save() with transaction.commit_on_success(): counter = 0 for line in data['transcript']: counter += 1 entry = Entry( sitting = sitting, type = line['type'], page_number = line['page_number'], text_counter = counter, speaker_name = line.get('speaker_name', ''), speaker_title = line.get('speaker_title', ''), content = line['text'], ) entry.save() source.last_processing_success = datetime.datetime.now() source.save() return None
def test_possible_matching_speakers(self): source = Source( name='Test source', url='http://example.com/foo/bar/testing', date=datetime.date(2011, 1, 3), ) venue = Venue( slug='test-venue', name='Test Venue', ) sitting = Sitting( start_date=datetime.date(2011, 1, 2), source=source, venue=venue, ) entry = Entry(sitting=sitting, ) james_smith = Person.objects.create( legal_name='James Smith', slug='james-smith', ) james_smith2 = Person.objects.create( title='Mr', legal_name='Bob Smith James', slug='james-smith2', ) mp = PositionTitle.objects.create( name='Member of Parliament', slug='mp', ) Position.objects.create( person=james_smith, title=mp, start_date=ApproximateDate(year=2011, month=1, day=1), end_date=ApproximateDate(future=True), category='political', ) Position.objects.create( person=james_smith2, title=mp, start_date=ApproximateDate(year=2011, month=1, day=1), end_date=ApproximateDate(future=True), category='political', ) entry.speaker_name = 'James Smith' speakers = entry.possible_matching_speakers( name_matching_algorithm=NAME_SUBSTRING_MATCH) self.assertListEqual(list(speakers), [james_smith]) entry.speaker_name = 'Mr Smith' speakers = entry.possible_matching_speakers( name_matching_algorithm=NAME_SUBSTRING_MATCH) self.assertItemsEqual(speakers, (james_smith, james_smith2)) speakers = entry.possible_matching_speakers( name_matching_algorithm=NAME_SET_INTERSECTION_MATCH) self.assertListEqual(list(speakers), [james_smith2]) entry.speaker_name = 'Mr James Smith' speakers = entry.possible_matching_speakers( name_matching_algorithm=NAME_SUBSTRING_MATCH) self.assertListEqual(list(speakers), [james_smith]) speakers = entry.possible_matching_speakers( name_matching_algorithm=NAME_SET_INTERSECTION_MATCH) self.assertListEqual(list(speakers), [james_smith2, james_smith])