Ejemplo n.º 1
0
 def test_capitalize_roman_numeral_suffixes(self):
     self.assertEqual('Ken Cuccinelli II', str(IndividualNameCleaver('KEN CUCCINELLI II').parse()))
     self.assertEqual('Ken Cuccinelli II', str(IndividualNameCleaver('CUCCINELLI II, KEN').parse()))
     self.assertEqual('Ken Cuccinelli IV', str(IndividualNameCleaver('CUCCINELLI IV, KEN').parse()))
     self.assertEqual('Ken Cuccinelli IX', str(IndividualNameCleaver('CUCCINELLI IX, KEN').parse()))
     self.assertEqual('Ken T Cuccinelli II', str(PoliticianNameCleaver('CUCCINELLI II, KEN T').parse()))
     self.assertEqual('Ken T Cuccinelli II', str(PoliticianNameCleaver('CUCCINELLI, KEN T II').parse()))
    def get_votesmart_id(self, candidates, name, state, district, seat):
        ''' attempt to determine the votesmart_id of this legislator, or return None. '''

        # narrow down by district (if approppriate?)
        #print "{0} {1} {2} {3}".format(name, state, district, seat)
        possibilities = [ x for x in candidates if x.electionDistrictName in [str(district), 'At-Large'] ]

        name_obj = PoliticianNameCleaver(name).parse()

        if isinstance(name_obj, RunningMatesNames):
            name_obj = name_obj.mates()[0] # just use the governor, not lt. governor (this is the only case where it's a list)

        name_possibilities = [ x for x in possibilities if \
                (x.lastName.lower() == name_obj.last.lower() \
                    or (name_obj.middle \
                            and ( \
                                x.lastName.lower() == ' '.join([name_obj.middle.lower(), name_obj.last.lower()]) \
                                or x.lastName.lower() == '-'.join([name_obj.middle.lower(), name_obj.last.lower()]) \
                            )\
                        ) \
                )
                and name_obj.first.lower() in [ x.firstName.lower(), x.preferredName.lower(), x.nickName.lower() ] \
                #and x.electionStatus == 'Running'
                ]

        if len(name_possibilities) == 1:
            cand = name_possibilities[0]
            #if cand.electionStatus == 'Running':
            return cand.candidateId
            #else:
            #    return None
        elif len(name_possibilities) > 1:
            self.too_many.append([(name_obj.first, name_obj.middle, name_obj.last), [ (x.firstName, x.preferredName, x.nickName, x.lastName) for x in possibilities ]])
        else:
            self.no_match.append([(name_obj.first, name_obj.middle, name_obj.last), [ (x.firstName, x.preferredName, x.nickName, x.lastName) for x in possibilities ]])
Ejemplo n.º 3
0
 def test_we_dont_need_no_steeenking_nicknames(self):
     self.assertEqual(
         'Robert M McDonnell',
         str(PoliticianNameCleaver('McDonnell, Robert M (Bob)').parse()))
     self.assertEqual(
         'John J Duncan Jr',
         str(PoliticianNameCleaver('John J (Jimmy) Duncan Jr (R)').parse()))
     self.assertEqual(
         'Christopher Bond',
         str(PoliticianNameCleaver('Christopher "Kit" Bond').parse()))
Ejemplo n.º 4
0
    def test_parse_safe__politician(self):
        with self.assertRaises(UnparseableNameException):
            PoliticianNameCleaver("BARDEN PHD J D, R CHRISTOPHER").parse()

        self.assertEqual('BARDEN PHD J D, R CHRISTOPHER', str(PoliticianNameCleaver('BARDEN PHD J D, R CHRISTOPHER').parse(safe=True)))

        with self.assertRaises(UnparseableNameException):
            PoliticianNameCleaver("gobbledy gook bah bah bloop!!!.p,.lcrg%%% #$<").parse()

        self.assertEqual('gobbledy gook bah bah bloop!!!.p,.lcrg%%% #$<', str(PoliticianNameCleaver('gobbledy gook bah bah bloop!!!.p,.lcrg%%% #$<').parse(safe=True)))
Ejemplo n.º 5
0
def earmarks_table_data(entity_id, cycle):
    rows = api.pol.earmarks(entity_id, cycle)
    for row in rows:
        for member in row['members']:
            member_obj_or_str = PoliticianNameCleaver(member['name']).parse()
            if isinstance(member_obj_or_str, PoliticianName):
                member['name'] = str(member_obj_or_str.plus_metadata(member['party'], member['state']))
            else:
                member['name'] = member_obj_or_str

    return rows
Ejemplo n.º 6
0
def earmarks_table_data(entity_id, cycle):
    rows = api.pol.earmarks(entity_id, cycle)
    for row in rows:
        for member in row['members']:
            member_obj_or_str = PoliticianNameCleaver(member['name']).parse()
            if isinstance(member_obj_or_str, PoliticianName):
                member['name'] = str(member_obj_or_str.plus_metadata(member['party'], member['state']))
            else:
                member['name'] = member_obj_or_str

    return rows
    def handle(self, *args, **options):
        self.log.info("Starting...")

        #candidates = self.get_all_congressional_candidates()
        #self.candidates = self.filter_candidates(candidates)

        cursor = connection.cursor()

        # get count
        cursor.execute("select count(*) from politician_metadata_latest_cycle_view")
        total = cursor.fetchone()
        transaction.rollback()

        select_sql = """
            select entity_id, name, state, district, seat, cycle
            from politician_metadata_latest_cycle_view m
            inner join matchbox_entity e on e.id = m.entity_id
            where
                entity_id not in (select entity_id from matchbox_votesmartinfo)
                and seat in ('state:governor', 'federal:house', 'federal:senate')
            order by entity_id
        """

        self.log.debug(select_sql)
        cursor.execute(select_sql)
        politicians = cursor.fetchall()
        transaction.rollback()

        self.log.info("{0} federal politicians located to find VoteSmart ids for".format(len(politicians)))

        # Reset existing data
        #cursor.execute("delete from matchbox_votesmartinfo")

        for (entity_id, name, state, district, seat, cycle) in politicians:
            name_obj = PoliticianNameCleaver(name).parse()
            if isinstance(name_obj, RunningMatesNames):
                for mate in name_obj.mates():
                    try:
                        self.process_politician(cursor, entity_id, name, state, district, seat, cycle, mate)
                    except django.db.utils.IntegrityError:
                        continue
            else:
                self.process_politician(cursor, entity_id, name, state, district, seat, cycle, name_obj)

        self.log.info("Done.")
        self.log.info("Names with too many matches:")
        too_many_file = open("/home/akr/work/datacommons/too_many_matches.txt", "w")
        too_many_file.write(self.pp.pformat(self.too_many))
        too_many_file.close()

        self.log.info("Names with no matches:")
        no_match_file = open("/home/akr/work/datacommons/no_match.txt", "w")
        no_match_file.write(self.pp.pformat(self.no_match))
        no_match_file.close()
Ejemplo n.º 8
0
 def test_running_mates_with_metadata(self):
     self.assertEqual(
         'Ted Strickland & Lee Fischer (D-OH)',
         str(
             PoliticianNameCleaver(
                 'STRICKLAND, TED & FISCHER, LEE').parse().plus_metadata(
                     'D', 'OH')))
Ejemplo n.º 9
0
    def read(self, request, **kwargs):
        kwargs.update({'name': request.GET.get('name', '')})

        out = super(DetailExplorerHandler, self).read(request, **kwargs)

        from name_cleaver import OrganizationNameCleaver, IndividualNameCleaver, PoliticianNameCleaver
        from django.contrib.humanize.templatetags.humanize import intcomma
        from django.template.defaultfilters import slugify

        for row in out:
            row['lobbyist_name_standardized'] = IndividualNameCleaver(
                row['lobbyist_name']).parse(
                ) if row['lobbyist_name'] else row['lobbyist_name']
            row['lobbyist_name_slug'] = slugify(
                row['lobbyist_name_standardized'])

            row['firm_name_standardized'] = OrganizationNameCleaver(
                row['firm_name']).parse(
                ) if row['firm_name'] else row['firm_name']
            row['firm_name_slug'] = slugify(row['firm_name_standardized'])

            if row['recipient_id']:
                row['recipient_name_standardized'] = PoliticianNameCleaver(
                    row['recipient_name']).parse()
            else:
                row['recipient_name_standardized'] = OrganizationNameCleaver(
                    row['recipient_name']).parse()
            row['recipient_name_slug'] = slugify(
                row['recipient_name_standardized'])

            row['total_amount_standardized'] = intcomma(row['total_amount'])
        return out
Ejemplo n.º 10
0
    def handle(self, *args, **options):
        aliases = EntityAlias.objects.filter(
            entity__type='politician',
            entity__attributes__namespace='urn:crp:recipient',
        ).exclude(name_parts__isnull=False)

        print aliases.query
        print '----------------------------------------------\n\n'

        for alias in aliases:
            if DEBUG:
                print alias.alias
                print alias.id

            name_obj = PoliticianNameCleaver(alias.alias).parse()

            if DEBUG:
                print str(name_obj)
                print '--------------------'

            name_parts = EntityNameParts.objects.get_or_create(
                alias=alias,
                first=name_obj.first,
                middle=name_obj.middle,
                last=name_obj.last,
                suffix=name_obj.suffix,
            )

            if not DEBUG:
                sys.stdout.write('.')
Ejemplo n.º 11
0
def bioguide_redirect(request, **kwargs):
    entity_id = api.entities.id_lookup(
        namespace='urn:sunlight:congressional_bioguide',
        id=kwargs['bioguide_id'])[0]['id']
    entity = api.entities.metadata(entity_id)
    entity_name = slugify(
        PoliticianNameCleaver(entity['name']).parse().name_str())
    return entity_redirect(request, entity_id, entity_name)
Ejemplo n.º 12
0
def normalize_politician(alias):
    parts = PoliticianNameCleaver(alias).parse()

    if isinstance(parts, RunningMatesNames):
        return get_name_permutations(parts.mate1) + get_name_permutations(
            parts.mate2)

    return [s.lower() for s in get_name_permutations(parts)]
Ejemplo n.º 13
0
 def test_with_metadata(self):
     self.assertEqual(
         'Charles Schumer (D-NY)',
         str(
             PoliticianNameCleaver('Charles Schumer').parse().plus_metadata(
                 'D', 'NY')))
     self.assertEqual(
         'Barack Obama (D)',
         str(
             PoliticianNameCleaver('Barack Obama').parse().plus_metadata(
                 'D', '')))
     self.assertEqual(
         'Charles Schumer (NY)',
         str(
             PoliticianNameCleaver('Charles Schumer').parse().plus_metadata(
                 '', 'NY')))
     self.assertEqual(
         'Jerry Leon Carroll',
         str(
             PoliticianNameCleaver('Jerry Leon Carroll').parse().
             plus_metadata(
                 '',
                 '')))  # only this one guy is missing both at the moment
Ejemplo n.º 14
0
    def handle(self, *args, **options):
        self.successes = 0
        self.failures_no_match = 0
        self.failures_too_many = 0

        for member in Member.objects.filter(standardized_name='').values(
                'raw_name', 'chamber', 'state').distinct():
            name_obj = PoliticianNameCleaver(member['raw_name']).parse()

            state_possibilities = self.get_set_of_states_from_earmark(member)

            kwargs = self.build_query_kwargs(member, chamber_map, name_obj)

            entities = self.entity_query_set(name_obj, kwargs)

            if (not member.get('state')) or entities.count(
            ) == 0:  # state might be wrong, so try the whole list of states

                if kwargs.has_key(
                        'politician_metadata_for_latest_cycle__state'):
                    kwargs.pop('politician_metadata_for_latest_cycle__state')

                kwargs[
                    'politician_metadata_for_latest_cycle__state__in'] = state_possibilities
                entities = self.entity_query_set(name_obj, kwargs)

            # main decision block
            if len(entities) == 0:
                self.failures_no_match += 1
                self.update_member(member, name_obj)

                if DEBUG:
                    self.print_member(member, state_possibilities)
                    print "- No match!"

            elif len(entities) > 1:
                self.update_member(member, name_obj)
                if DEBUG:
                    self.prompt_on_too_many_matches(member,
                                                    state_possibilities,
                                                    entities)
                else:
                    self.failures_too_many += 1

            elif len(entities) == 1:
                self.update_member(member, name_obj, entities[0])
                self.successes += 1
Ejemplo n.º 15
0
    def update_member(self, member, name_obj, entity=None):
        crp_id = entity.attributes.get(
            namespace='urn:crp:recipient').value if entity else ''
        name = str(PoliticianNameCleaver(
            entity.name).parse()) if entity else str(name_obj)

        member_objs = Member.objects.filter(
            raw_name=member.get('raw_name'),
            chamber=member.get('chamber'),
            state=member.get('state'),
        ).update(
            crp_id=crp_id,
            standardized_name=name,
        )

        if entity:
            self.print_member(member)
            print '- Updated for state {0}!'.format(
                entity.politician_metadata_for_latest_cycle.state)
Ejemplo n.º 16
0
 def test_doesnt_misinterpret_roman_numeral_characters_in_last_name_as_suffix(
         self):
     self.assertEqual(
         'Vickers',
         PoliticianNameCleaver('Audrey C Vickers').parse().last)
Ejemplo n.º 17
0
 def test_deals_with_last_names_that_look_like_two_part_but_are_not(self):
     name = PoliticianNameCleaver('Quoc Van (D)').parse()
     self.assertEqual('Quoc', name.first)
     self.assertEqual('Van', name.last)
Ejemplo n.º 18
0
 def test_name_with_two_part_last_name(self):
     self.assertEqual(
         'La Mere',
         PoliticianNameCleaver('Albert J La Mere').parse().last)
     self.assertEqual('Di Souza',
                      PoliticianNameCleaver('Dinesh Di Souza').parse().last)
Ejemplo n.º 19
0
 def test_case_converts_in_non_mixed_case_names_only(self):
     self.assertEqual('Antonio dAlesio',
                      str(PoliticianNameCleaver('Antonio dAlesio').parse()))
Ejemplo n.º 20
0
 def test_edgar_de_lisle_ross(self):
     name = PoliticianNameCleaver('Edgar de L\'Isle Ross (R)').parse()
     self.assertEqual('Edgar', name.first)
     self.assertEqual('de L\'Isle', name.middle)
     self.assertEqual('Ross', name.last)
     self.assertEqual(None, name.suffix)
Ejemplo n.º 21
0
 def test_standardize_running_mate_names(self):
     self.assertEqual(
         'John Kasich & Mary Taylor',
         str(PoliticianNameCleaver('Kasich, John & Taylor, Mary').parse()))
Ejemplo n.º 22
0
 def test_names_with_weird_parenthetical_stuff(self):
     self.assertEqual(
         'Lynn Swann',
         str(PoliticianNameCleaver('SWANN, LYNN (COMMITTEE 1)').parse()))
Ejemplo n.º 23
0
 def test_pile_it_on_two(self):
     self.assertEqual(
         'William Steve Southerland II',
         str(
             PoliticianNameCleaver(
                 'William Steve Southerland  II (R)').parse()))
Ejemplo n.º 24
0
 def test_pile_it_on(self):
     self.assertEqual(
         'Milton Elmer McCullough Jr',
         str(
             PoliticianNameCleaver(
                 'Milton Elmer "Mac" McCullough, Jr (3)').parse()))
Ejemplo n.º 25
0
 def test_last_first(self):
     self.assertEqual('Albert Gore',
                      str(PoliticianNameCleaver('Gore, Albert').parse()))
Ejemplo n.º 26
0
 def test_not_everything_is_a_scot(self):
     self.assertEqual('Adam Mack',
                      str(PoliticianNameCleaver('ADAM MACK').parse()))
     self.assertEqual('Don Womackey',
                      str(PoliticianNameCleaver('DON WOMACKEY').parse()))
Ejemplo n.º 27
0
 def test_first_last_mixed_case_with_party(self):
     self.assertEqual(
         'Nancy Pelosi',
         str(PoliticianNameCleaver('Nancy Pelosi (D)').parse()))
Ejemplo n.º 28
0
 def test_last_first_mixed_case_scot_with_party(self):
     self.assertEqual(
         'Emory MacDonald',
         str(PoliticianNameCleaver('MacDonald, Emory (R)').parse()))
Ejemplo n.º 29
0
 def test_pile_it_on_three(self):
     self.assertEqual(
         'Edward Thomas O\'Donnell Jr',
         str(
             PoliticianNameCleaver(
                 'Edward Thomas O\'Donnell, Jr (D)').parse()))
Ejemplo n.º 30
0
 def test_capitalize_irish_names(self):
     self.assertEqual('Sean O\'Leary',
                      str(PoliticianNameCleaver('SEAN O\'LEARY').parse()))
Ejemplo n.º 31
0
 def test_handles_empty_names(self):
     self.assertEqual('', str(PoliticianNameCleaver('').parse()))