def test_capitalize_roman_numeral_suffixes(self): self.assertEqual('Ken Cuccinelli II', str(IndividualNameCleaver('KEN CUCCINELLI II').parse())) self.assertEqual('Ken Cuccinelli II', str(IndividualNameCleaver('CUCCINELLI II, KEN').parse())) self.assertEqual('Ken Cuccinelli IV', str(IndividualNameCleaver('CUCCINELLI IV, KEN').parse())) self.assertEqual('Ken Cuccinelli IX', str(IndividualNameCleaver('CUCCINELLI IX, KEN').parse())) self.assertEqual('Ken T Cuccinelli II', str(PoliticianNameCleaver('CUCCINELLI II, KEN T').parse())) self.assertEqual('Ken T Cuccinelli II', str(PoliticianNameCleaver('CUCCINELLI, KEN T II').parse()))
def test_we_dont_need_no_steeenking_nicknames(self): self.assertEqual( 'Robert M McDonnell', str(PoliticianNameCleaver('McDonnell, Robert M (Bob)').parse())) self.assertEqual( 'John J Duncan Jr', str(PoliticianNameCleaver('John J (Jimmy) Duncan Jr (R)').parse())) self.assertEqual( 'Christopher Bond', str(PoliticianNameCleaver('Christopher "Kit" Bond').parse()))
def test_parse_safe__politician(self): with self.assertRaises(UnparseableNameException): PoliticianNameCleaver("BARDEN PHD J D, R CHRISTOPHER").parse() self.assertEqual('BARDEN PHD J D, R CHRISTOPHER', str(PoliticianNameCleaver('BARDEN PHD J D, R CHRISTOPHER').parse(safe=True))) with self.assertRaises(UnparseableNameException): PoliticianNameCleaver("gobbledy gook bah bah bloop!!!.p,.lcrg%%% #$<").parse() self.assertEqual('gobbledy gook bah bah bloop!!!.p,.lcrg%%% #$<', str(PoliticianNameCleaver('gobbledy gook bah bah bloop!!!.p,.lcrg%%% #$<').parse(safe=True)))
def test_running_mates_with_metadata(self): self.assertEqual( 'Ted Strickland & Lee Fischer (D-OH)', str( PoliticianNameCleaver( 'STRICKLAND, TED & FISCHER, LEE').parse().plus_metadata( 'D', 'OH')))
def read(self, request, **kwargs): kwargs.update({'name': request.GET.get('name', '')}) out = super(DetailExplorerHandler, self).read(request, **kwargs) from name_cleaver import OrganizationNameCleaver, IndividualNameCleaver, PoliticianNameCleaver from django.contrib.humanize.templatetags.humanize import intcomma from django.template.defaultfilters import slugify for row in out: row['lobbyist_name_standardized'] = IndividualNameCleaver( row['lobbyist_name']).parse( ) if row['lobbyist_name'] else row['lobbyist_name'] row['lobbyist_name_slug'] = slugify( row['lobbyist_name_standardized']) row['firm_name_standardized'] = OrganizationNameCleaver( row['firm_name']).parse( ) if row['firm_name'] else row['firm_name'] row['firm_name_slug'] = slugify(row['firm_name_standardized']) if row['recipient_id']: row['recipient_name_standardized'] = PoliticianNameCleaver( row['recipient_name']).parse() else: row['recipient_name_standardized'] = OrganizationNameCleaver( row['recipient_name']).parse() row['recipient_name_slug'] = slugify( row['recipient_name_standardized']) row['total_amount_standardized'] = intcomma(row['total_amount']) return out
def handle(self, *args, **options): aliases = EntityAlias.objects.filter( entity__type='politician', entity__attributes__namespace='urn:crp:recipient', ).exclude(name_parts__isnull=False) print aliases.query print '----------------------------------------------\n\n' for alias in aliases: if DEBUG: print alias.alias print alias.id name_obj = PoliticianNameCleaver(alias.alias).parse() if DEBUG: print str(name_obj) print '--------------------' name_parts = EntityNameParts.objects.get_or_create( alias=alias, first=name_obj.first, middle=name_obj.middle, last=name_obj.last, suffix=name_obj.suffix, ) if not DEBUG: sys.stdout.write('.')
def get_votesmart_id(self, candidates, name, state, district, seat): ''' attempt to determine the votesmart_id of this legislator, or return None. ''' # narrow down by district (if approppriate?) #print "{0} {1} {2} {3}".format(name, state, district, seat) possibilities = [ x for x in candidates if x.electionDistrictName in [str(district), 'At-Large'] ] name_obj = PoliticianNameCleaver(name).parse() if isinstance(name_obj, RunningMatesNames): name_obj = name_obj.mates()[0] # just use the governor, not lt. governor (this is the only case where it's a list) name_possibilities = [ x for x in possibilities if \ (x.lastName.lower() == name_obj.last.lower() \ or (name_obj.middle \ and ( \ x.lastName.lower() == ' '.join([name_obj.middle.lower(), name_obj.last.lower()]) \ or x.lastName.lower() == '-'.join([name_obj.middle.lower(), name_obj.last.lower()]) \ )\ ) \ ) and name_obj.first.lower() in [ x.firstName.lower(), x.preferredName.lower(), x.nickName.lower() ] \ #and x.electionStatus == 'Running' ] if len(name_possibilities) == 1: cand = name_possibilities[0] #if cand.electionStatus == 'Running': return cand.candidateId #else: # return None elif len(name_possibilities) > 1: self.too_many.append([(name_obj.first, name_obj.middle, name_obj.last), [ (x.firstName, x.preferredName, x.nickName, x.lastName) for x in possibilities ]]) else: self.no_match.append([(name_obj.first, name_obj.middle, name_obj.last), [ (x.firstName, x.preferredName, x.nickName, x.lastName) for x in possibilities ]])
def bioguide_redirect(request, **kwargs): entity_id = api.entities.id_lookup( namespace='urn:sunlight:congressional_bioguide', id=kwargs['bioguide_id'])[0]['id'] entity = api.entities.metadata(entity_id) entity_name = slugify( PoliticianNameCleaver(entity['name']).parse().name_str()) return entity_redirect(request, entity_id, entity_name)
def normalize_politician(alias): parts = PoliticianNameCleaver(alias).parse() if isinstance(parts, RunningMatesNames): return get_name_permutations(parts.mate1) + get_name_permutations( parts.mate2) return [s.lower() for s in get_name_permutations(parts)]
def earmarks_table_data(entity_id, cycle): rows = api.pol.earmarks(entity_id, cycle) for row in rows: for member in row['members']: member_obj_or_str = PoliticianNameCleaver(member['name']).parse() if isinstance(member_obj_or_str, PoliticianName): member['name'] = str(member_obj_or_str.plus_metadata(member['party'], member['state'])) else: member['name'] = member_obj_or_str return rows
def handle(self, *args, **options): self.log.info("Starting...") #candidates = self.get_all_congressional_candidates() #self.candidates = self.filter_candidates(candidates) cursor = connection.cursor() # get count cursor.execute("select count(*) from politician_metadata_latest_cycle_view") total = cursor.fetchone() transaction.rollback() select_sql = """ select entity_id, name, state, district, seat, cycle from politician_metadata_latest_cycle_view m inner join matchbox_entity e on e.id = m.entity_id where entity_id not in (select entity_id from matchbox_votesmartinfo) and seat in ('state:governor', 'federal:house', 'federal:senate') order by entity_id """ self.log.debug(select_sql) cursor.execute(select_sql) politicians = cursor.fetchall() transaction.rollback() self.log.info("{0} federal politicians located to find VoteSmart ids for".format(len(politicians))) # Reset existing data #cursor.execute("delete from matchbox_votesmartinfo") for (entity_id, name, state, district, seat, cycle) in politicians: name_obj = PoliticianNameCleaver(name).parse() if isinstance(name_obj, RunningMatesNames): for mate in name_obj.mates(): try: self.process_politician(cursor, entity_id, name, state, district, seat, cycle, mate) except django.db.utils.IntegrityError: continue else: self.process_politician(cursor, entity_id, name, state, district, seat, cycle, name_obj) self.log.info("Done.") self.log.info("Names with too many matches:") too_many_file = open("/home/akr/work/datacommons/too_many_matches.txt", "w") too_many_file.write(self.pp.pformat(self.too_many)) too_many_file.close() self.log.info("Names with no matches:") no_match_file = open("/home/akr/work/datacommons/no_match.txt", "w") no_match_file.write(self.pp.pformat(self.no_match)) no_match_file.close()
def test_with_metadata(self): self.assertEqual( 'Charles Schumer (D-NY)', str( PoliticianNameCleaver('Charles Schumer').parse().plus_metadata( 'D', 'NY'))) self.assertEqual( 'Barack Obama (D)', str( PoliticianNameCleaver('Barack Obama').parse().plus_metadata( 'D', ''))) self.assertEqual( 'Charles Schumer (NY)', str( PoliticianNameCleaver('Charles Schumer').parse().plus_metadata( '', 'NY'))) self.assertEqual( 'Jerry Leon Carroll', str( PoliticianNameCleaver('Jerry Leon Carroll').parse(). plus_metadata( '', ''))) # only this one guy is missing both at the moment
def handle(self, *args, **options): self.successes = 0 self.failures_no_match = 0 self.failures_too_many = 0 for member in Member.objects.filter(standardized_name='').values( 'raw_name', 'chamber', 'state').distinct(): name_obj = PoliticianNameCleaver(member['raw_name']).parse() state_possibilities = self.get_set_of_states_from_earmark(member) kwargs = self.build_query_kwargs(member, chamber_map, name_obj) entities = self.entity_query_set(name_obj, kwargs) if (not member.get('state')) or entities.count( ) == 0: # state might be wrong, so try the whole list of states if kwargs.has_key( 'politician_metadata_for_latest_cycle__state'): kwargs.pop('politician_metadata_for_latest_cycle__state') kwargs[ 'politician_metadata_for_latest_cycle__state__in'] = state_possibilities entities = self.entity_query_set(name_obj, kwargs) # main decision block if len(entities) == 0: self.failures_no_match += 1 self.update_member(member, name_obj) if DEBUG: self.print_member(member, state_possibilities) print "- No match!" elif len(entities) > 1: self.update_member(member, name_obj) if DEBUG: self.prompt_on_too_many_matches(member, state_possibilities, entities) else: self.failures_too_many += 1 elif len(entities) == 1: self.update_member(member, name_obj, entities[0]) self.successes += 1
def update_member(self, member, name_obj, entity=None): crp_id = entity.attributes.get( namespace='urn:crp:recipient').value if entity else '' name = str(PoliticianNameCleaver( entity.name).parse()) if entity else str(name_obj) member_objs = Member.objects.filter( raw_name=member.get('raw_name'), chamber=member.get('chamber'), state=member.get('state'), ).update( crp_id=crp_id, standardized_name=name, ) if entity: self.print_member(member) print '- Updated for state {0}!'.format( entity.politician_metadata_for_latest_cycle.state)
def test_doesnt_misinterpret_roman_numeral_characters_in_last_name_as_suffix( self): self.assertEqual( 'Vickers', PoliticianNameCleaver('Audrey C Vickers').parse().last)
def test_deals_with_last_names_that_look_like_two_part_but_are_not(self): name = PoliticianNameCleaver('Quoc Van (D)').parse() self.assertEqual('Quoc', name.first) self.assertEqual('Van', name.last)
def test_name_with_two_part_last_name(self): self.assertEqual( 'La Mere', PoliticianNameCleaver('Albert J La Mere').parse().last) self.assertEqual('Di Souza', PoliticianNameCleaver('Dinesh Di Souza').parse().last)
def test_case_converts_in_non_mixed_case_names_only(self): self.assertEqual('Antonio dAlesio', str(PoliticianNameCleaver('Antonio dAlesio').parse()))
def test_edgar_de_lisle_ross(self): name = PoliticianNameCleaver('Edgar de L\'Isle Ross (R)').parse() self.assertEqual('Edgar', name.first) self.assertEqual('de L\'Isle', name.middle) self.assertEqual('Ross', name.last) self.assertEqual(None, name.suffix)
def test_standardize_running_mate_names(self): self.assertEqual( 'John Kasich & Mary Taylor', str(PoliticianNameCleaver('Kasich, John & Taylor, Mary').parse()))
def test_capitalize_irish_names(self): self.assertEqual('Sean O\'Leary', str(PoliticianNameCleaver('SEAN O\'LEARY').parse()))
def test_pile_it_on_two(self): self.assertEqual( 'William Steve Southerland II', str( PoliticianNameCleaver( 'William Steve Southerland II (R)').parse()))
def test_pile_it_on(self): self.assertEqual( 'Milton Elmer McCullough Jr', str( PoliticianNameCleaver( 'Milton Elmer "Mac" McCullough, Jr (3)').parse()))
def test_last_first(self): self.assertEqual('Albert Gore', str(PoliticianNameCleaver('Gore, Albert').parse()))
def test_not_everything_is_a_scot(self): self.assertEqual('Adam Mack', str(PoliticianNameCleaver('ADAM MACK').parse())) self.assertEqual('Don Womackey', str(PoliticianNameCleaver('DON WOMACKEY').parse()))
def test_first_last_mixed_case_with_party(self): self.assertEqual( 'Nancy Pelosi', str(PoliticianNameCleaver('Nancy Pelosi (D)').parse()))
def test_names_with_weird_parenthetical_stuff(self): self.assertEqual( 'Lynn Swann', str(PoliticianNameCleaver('SWANN, LYNN (COMMITTEE 1)').parse()))
def test_pile_it_on_three(self): self.assertEqual( 'Edward Thomas O\'Donnell Jr', str( PoliticianNameCleaver( 'Edward Thomas O\'Donnell, Jr (D)').parse()))
def test_last_first_mixed_case_scot_with_party(self): self.assertEqual( 'Emory MacDonald', str(PoliticianNameCleaver('MacDonald, Emory (R)').parse()))
def test_handles_empty_names(self): self.assertEqual('', str(PoliticianNameCleaver('').parse()))