Ejemplo n.º 1
0
    def create_entries_from_data_and_source(cls, data, source):
        """Create the needed sitting and entries"""

        venue = Venue.objects.get(slug=data['meta']['venue'])

        # Joint Sittings can be published by both Houses (identical documents)
        # prevent the same Sitting being created twice
        if 'Joint Sitting' in source.name \
            and Sitting.objects.filter(
                    venue=venue,
                    source__name=source.name,
                    start_date=source.date,
                    start_time=data['meta'].get('start_time', None)
                ).exists():
            print "skipping duplicate source %s for %s" % (source.name,
                                                           source.date)
            return None

        sitting = Sitting(
            source=source,
            venue=venue,
            start_date=source.date,
            start_time=data['meta'].get('start_time', None),
            end_date=source.date,
            end_time=data['meta'].get('end_time', None),
        )
        sitting.save()

        with transaction.commit_on_success():
            counter = 0
            for line in data['transcript']:

                counter += 1

                entry = Entry(
                    sitting=sitting,
                    type=line['type'],
                    page_number=line['page_number'],
                    text_counter=counter,
                    speaker_name=line.get('speaker_name', ''),
                    speaker_title=line.get('speaker_title', ''),
                    content=line['text'],
                )
                entry.save()

            source.last_processing_success = datetime.datetime.now()
            source.save()

        return None
Ejemplo n.º 2
0
    def test_multiple_politician_name_matches_senate(self):
        entry = Entry(
            sitting=self.senate_sitting,
            type='text',
            page_number=12,
            text_counter=4,
            speaker_name='Jones',
            speaker_title='Hon.',
            content='test',
        )
        possible_speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)

        self.assertEqual(1, len(possible_speakers))
        self.assertEqual(self.senator, possible_speakers[0])
Ejemplo n.º 3
0
    def test_multiple_politician_name_matches_joint_sitting(self):
        self.source.name = "Joint Sitting of the Parliament"
        self.source.save()

        entry = Entry(
            sitting       = self.na_sitting,
            type          = 'text',
            page_number   = 12,
            text_counter  = 4,
            speaker_name  = 'Jones',
            speaker_title = 'Hon.',
            content       = 'test',
        )
        possible_speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)
        self.assertEqual(2, len(possible_speakers))
Ejemplo n.º 4
0
 def test_alias_match_score(self):
     self.assertEqual(
         Entry().alias_match_score('Mr Bob Smith', 'Mr Bob Smith'), 3)
     self.assertEqual(Entry().alias_match_score('Mr Bob Smith', 'Mr Smith'),
                      2)
     self.assertEqual(
         Entry().alias_match_score('Mr Bob Smith', 'Bob Smith'), 2)
     self.assertEqual(Entry().alias_match_score('Mr Bob Smith', 'Bob'), 1)
     self.assertEqual(Entry().alias_match_score('Bob Smith', 'Smith, Bob'),
                      2)
     self.assertEqual(
         Entry().alias_match_score('Mr Bob Smith', 'Miss Alice Jones'), 0)
Ejemplo n.º 5
0
    def test_exclude_hidden_profiles(self):
        self.senator.hidden = True
        self.senator.save()

        entry = Entry(
            sitting=self.senate_sitting,
            type='text',
            page_number=12,
            text_counter=4,
            speaker_name='Jones',
            speaker_title='Hon.',
            content='test',
        )
        possible_speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)

        self.assertEqual(1, len(possible_speakers))
        self.assertEqual(self.mp, possible_speakers[0])
Ejemplo n.º 6
0
    def create_entries_from_data_and_source( cls, data, source ):
        """Create the needed sitting and entries"""

        venue = Venue.objects.get( slug=data['meta']['venue'] )

        sitting = Sitting(
            source     = source,
            venue      = venue,
            start_date = source.date,
            start_time = data['meta'].get('start_time', None),
            end_date   = source.date,
            end_time   = data['meta'].get('end_time', None),
        )
        sitting.save()
        

        with transaction.commit_on_success():
            counter = 0
            for line in data['transcript']:
                
                counter += 1
                
                entry = Entry(
                    sitting       = sitting,
                    type          = line['type'],
                    page_number   = line['page_number'],
                    text_counter  = counter,
                    speaker_name  = line.get('speaker_name',  ''),
                    speaker_title = line.get('speaker_title', ''),
                    content       = line['text'],
                )
                entry.save()

            source.last_processing_success = datetime.datetime.now()
            source.save()
        
        return None
Ejemplo n.º 7
0
    def test_possible_matching_speakers(self):
        source = Source(
            name='Test source',
            url='http://example.com/foo/bar/testing',
            date=datetime.date(2011, 1, 3),
        )

        venue = Venue(
            slug='test-venue',
            name='Test Venue',
        )

        sitting = Sitting(
            start_date=datetime.date(2011, 1, 2),
            source=source,
            venue=venue,
        )

        entry = Entry(sitting=sitting, )

        james_smith = Person.objects.create(
            legal_name='James Smith',
            slug='james-smith',
        )

        james_smith2 = Person.objects.create(
            title='Mr',
            legal_name='Bob Smith James',
            slug='james-smith2',
        )

        mp = PositionTitle.objects.create(
            name='Member of Parliament',
            slug='mp',
        )

        Position.objects.create(
            person=james_smith,
            title=mp,
            start_date=ApproximateDate(year=2011, month=1, day=1),
            end_date=ApproximateDate(future=True),
            category='political',
        )

        Position.objects.create(
            person=james_smith2,
            title=mp,
            start_date=ApproximateDate(year=2011, month=1, day=1),
            end_date=ApproximateDate(future=True),
            category='political',
        )

        entry.speaker_name = 'James Smith'
        speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)
        self.assertListEqual(list(speakers), [james_smith])

        entry.speaker_name = 'Mr Smith'
        speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)
        self.assertItemsEqual(speakers, (james_smith, james_smith2))

        speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SET_INTERSECTION_MATCH)
        self.assertListEqual(list(speakers), [james_smith2])

        entry.speaker_name = 'Mr James Smith'
        speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SUBSTRING_MATCH)
        self.assertListEqual(list(speakers), [james_smith])

        speakers = entry.possible_matching_speakers(
            name_matching_algorithm=NAME_SET_INTERSECTION_MATCH)
        self.assertListEqual(list(speakers), [james_smith2, james_smith])