def test_capitalize_roman_numeral_suffixes(self): self.assertEqual('Ken Cuccinelli II', str(IndividualNameCleaver('KEN CUCCINELLI II').parse())) self.assertEqual('Ken Cuccinelli II', str(IndividualNameCleaver('CUCCINELLI II, KEN').parse())) self.assertEqual('Ken Cuccinelli IV', str(IndividualNameCleaver('CUCCINELLI IV, KEN').parse())) self.assertEqual('Ken Cuccinelli IX', str(IndividualNameCleaver('CUCCINELLI IX, KEN').parse())) self.assertEqual('Ken T Cuccinelli II', str(PoliticianNameCleaver('CUCCINELLI II, KEN T').parse())) self.assertEqual('Ken T Cuccinelli II', str(PoliticianNameCleaver('CUCCINELLI, KEN T II').parse()))
def test_parse_safe__individual(self): with self.assertRaises(UnparseableNameException): IndividualNameCleaver("BARDEN PHD J D, R CHRISTOPHER").parse() self.assertEqual('BARDEN PHD J D, R CHRISTOPHER', str(IndividualNameCleaver('BARDEN PHD J D, R CHRISTOPHER').parse(safe=True))) with self.assertRaises(UnparseableNameException): IndividualNameCleaver("gobbledy blah bloop!!!.p,.lcrg%%% #$<").parse() self.assertEqual('gobbledy blah bloop!!!.p,.lcrg%%% #$<', str(IndividualNameCleaver('gobbledy blah bloop!!!.p,.lcrg%%% #$<').parse(safe=True)))
def test_nicknames_suffixes_and_honorifics(self): self.assertEqual( 'Frederick A "Tripp" Baird III', str( IndividualNameCleaver( 'Baird, Frederick A "Tripp" III Mr').parse())) self.assertEqual( 'Frederick A "Tripp" Baird III', str( IndividualNameCleaver( 'Baird, Mr Frederick A "Tripp" III').parse()))
def test_throw_out_mr(self): self.assertEqual( 'T Boone Pickens', str(IndividualNameCleaver('Mr T Boone Pickens').parse())) self.assertEqual( 'T Boone Pickens', str(IndividualNameCleaver('Mr. T Boone Pickens').parse())) self.assertEqual( 'T Boone Pickens', str(IndividualNameCleaver('Pickens, T Boone Mr').parse())) self.assertEqual('John L Nau', str(IndividualNameCleaver(' MR JOHN L NAU,').parse()))
def test_keep_the_mrs(self): self.assertEqual( 'Mrs. T Boone Pickens', str(IndividualNameCleaver('Mrs T Boone Pickens').parse())) self.assertEqual( 'Mrs. T Boone Pickens', str(IndividualNameCleaver('Mrs. T Boone Pickens').parse())) self.assertEqual( 'Mrs. Stanford Z Rothschild', str( IndividualNameCleaver( 'ROTHSCHILD 212, STANFORD Z MRS').parse()))
def loopThroughNames(self,names): for name in names: name = name.strip() logging.info(name) n = IndividualNameCleaver(name).parse() if n.middle: n.middle = n.middle.replace('.','') else: n.middle='' #(first,middle,last,suffix,nick) = (n.first, n.middle, n.last, n.suffix, n.nick) if [n.last,n.first,n.middle] in self.existing: logging.info('skipping %s, already present' % name) continue self.lookupName(n,name) sleep(.15)
def read(self, request, **kwargs): kwargs.update({'name': request.GET.get('name', '')}) out = super(DetailExplorerHandler, self).read(request, **kwargs) from name_cleaver import OrganizationNameCleaver, IndividualNameCleaver, PoliticianNameCleaver from django.contrib.humanize.templatetags.humanize import intcomma from django.template.defaultfilters import slugify for row in out: row['lobbyist_name_standardized'] = IndividualNameCleaver( row['lobbyist_name']).parse( ) if row['lobbyist_name'] else row['lobbyist_name'] row['lobbyist_name_slug'] = slugify( row['lobbyist_name_standardized']) row['firm_name_standardized'] = OrganizationNameCleaver( row['firm_name']).parse( ) if row['firm_name'] else row['firm_name'] row['firm_name_slug'] = slugify(row['firm_name_standardized']) if row['recipient_id']: row['recipient_name_standardized'] = PoliticianNameCleaver( row['recipient_name']).parse() else: row['recipient_name_standardized'] = OrganizationNameCleaver( row['recipient_name']).parse() row['recipient_name_slug'] = slugify( row['recipient_name_standardized']) row['total_amount_standardized'] = intcomma(row['total_amount']) return out
def goodEnoughMatch(self,n1,match): #the court's matching is intentionally weak, so this is important n2 = IndividualNameCleaver(match).parse() if n2.middle: n2.middle = n2.middle.replace('.','') if len(n2.last.split(' & '))>1: #try to catch cases like 'DOWTIN & ALL OTHER O, TANYA' n2.last = n2.last.split(' & ')[0] if n1.last!=n2.last: return False if n1.first!=n2.first: if (not n1.nick or n1.nick!=n2.first) or (not n2.nick or n2.nick!=n1.first): return False if n1.middle and n2.middle: (middle1,middle2) = (n1.middle,n2.middle) if len(middle1)>1 and len(middle2)>1: if middle1!=middle2: return False elif middle1[0]!=middle2[0]: return False #check jr's, sr's? not doing not. return True
def loopThroughNames(self,names): for name in names: name = name.strip() logging.info(name) n = IndividualNameCleaver(name).parse() if n.middle: n.middle = n.middle.replace('.','') else: n.middle='' #(first,middle,last,suffix,nick) = (n.first, n.middle, n.last, n.suffix, n.nick) if [n.last,n.first,n.middle] in self.existing: logging.info('skipping %s, already present' % name) continue try: self.lookupName(n,name) except: goToIndex = self.driver.find_element_by_css_selector(".searchtoggleon") if goToIndex: goToIndex.click() print 'error'
def name(s, **kwargs): if not ' ' in s and ',' in s: # fix for wonkiness in name_cleaver s = s.replace(',', ', ').strip() if 'last_name_space' in kwargs: first_space = s.find(' ') l = list(s) l[first_space] = ', ' s = ''.join(l) name = IndividualNameCleaver(s).parse() return unicode(name)
def add_advertiser_signatory(request): if 'advertiser_id' in request.GET: defaults = { 'advertiser_id': request.GET['advertiser_id'] or None } else: defaults = {} if 'search' in request.GET: input_name = IndividualNameCleaver(request.GET['search']).parse(safe=True) if isinstance(input_name, basestring): defaults['first_name'] = input_name else: defaults['first_name'] = input_name.first defaults['middle_name'] = input_name.middle defaults['last_name'] = input_name.last defaults['suffix'] = input_name.suffix if request.method == "POST": form = AdvertiserSignatoryForm(request.POST) if form.is_valid(): person = Person() person.first_name = form.data['first_name'] person.middle_name = form.data.get('middle_name', None) person.last_name = form.data['last_name'] person.suffix = form.data.get('suffix', None) person.save(request.user) if 'advertiser_id' in form.data: adv_id = form.data.get('advertiser_id', None) if adv_id: try: advertiser = Organization.objects.get(id=adv_id) role = Role(person=person, organization=advertiser) role.title = form.data.get('job_title', '') role.save(request.user) except Organization.DoesNotExist: # What else to do in this case? pass return HttpResponse('<script type="text/javascript">opener.dismissAddAnotherPopup(window, "%s", "%s");</script>' % (escape(person._get_pk_val()), escape(person))) return handlePopAdd(request, AdvertiserSignatoryForm, 'advertiser_signatory', initial_data=defaults)
def test_capitalize_scottish_last_names(self): self.assertEqual('Ronald McDonald', str(IndividualNameCleaver('RONALD MCDONALD').parse())) self.assertEqual('Old MacDonald', str(IndividualNameCleaver('OLD MACDONALD').parse()))
def test_mrs_walton(self): self.assertEqual('Mrs. Jim Walton', str(IndividualNameCleaver('WALTON, JIM MRS').parse()))
def test_doesnt_overzealously_detect_doctors(self): self.assertEqual('Drew Maloney', str(IndividualNameCleaver('Maloney, Drew').parse()))
def test_unfazed_by_weird_cop_cont_parenthetical_phrases(self): self.assertEqual('Jacqueline A Schmitz', str(IndividualNameCleaver('SCHMITZ (COP CONT ), JACQUELINE A').parse())) self.assertEqual('Hannah Mellman', str(IndividualNameCleaver('MELLMAN (CONT\'D), HANNAH (CONT\'D)').parse())) self.assertEqual('Tod Preston', str(IndividualNameCleaver('PRESTON (C O P CONT\'D ), TOD').parse()))
def test_mr_and_mrs(self): self.assertEqual('Kenneth L Lay', str(IndividualNameCleaver('LAY, KENNETH L MR & MRS').parse()))
def test_individual(self): self.assertEqual(u'Tobias F\u00fcnke'.encode('utf-8'), \ str(IndividualNameCleaver(u'F\u00fcnke, Tobias').parse()))
# Create the inital contribution object id = row[DEFAULT_FIELDS['id']['fieldname']] created = False try: # Manual step-through of get_or_create sans the commit record = Contribution.objects.get(id=id) except Contribution.DoesNotExist: record = Contribution() created = True # Iterate through the fields in DEFAULT_FIELDS and set contribution object attributes properly for field in DEFAULT_FIELDS.keys(): setattr(record, field, row[DEFAULT_FIELDS[field]['fieldname']]) # Now parse the contributor_name field into its constituent parts using Sunlight's name-cleaver. # More here: https://github.com/sunlightlabs/name-cleaver parsed_name = IndividualNameCleaver(record.contributor_name).parse() # Add the name parts to the database if created and parsed_name: record.honorific = parsed_name.honorific record.first_name = parsed_name.first record.middle_name = parsed_name.middle record.last_name = parsed_name.last record.suffix = parsed_name.suffix record.nick = parsed_name.nick # Create the records 5,000 at a time and print progress to the terminal bulk_records.append(record) if i % 5000 == 0: create_records(bulk_records) print '%s records created ...' % i
from django.http import Http404 from django.template.defaultfilters import slugify from settings import api, LATEST_CYCLE, DEFAULT_CYCLE import datetime import googleanalytics import re from django.utils.datastructures import SortedDict from name_cleaver import PoliticianNameCleaver, OrganizationNameCleaver, \ IndividualNameCleaver from name_cleaver.names import PoliticianName _standardizers = { 'politician': lambda n: PoliticianNameCleaver(n).parse(), 'individual': lambda n: IndividualNameCleaver(n).parse(), 'industry': lambda n: OrganizationNameCleaver(n).parse(), 'organization': lambda n: OrganizationNameCleaver(n).parse(), } def standardize_name(name, type): try: standardized_name = _standardizers[type](name) if standardized_name.honorific: standardized_name.honorific = "("+standardized_name.honorific+")" return standardized_name except AttributeError: return _standardizers[type](name) def bar_validate(data): ''' take a dict formatted for submission to the barchart generation function, and make sure there's data worth displaying.
def standardize_individual_name_filter(name): return str(remove_honorific(IndividualNameCleaver(name).parse()))
return [s.lower() for s in get_name_permutations(parts)] def get_name_permutations(name): options = [name.primary_name_parts()] if name.middle: options.append(name.primary_name_parts(include_middle=True)) return [' '.join(x) for x in options] NORMALIZERS_BY_TYPE = { 'individual': lambda x: get_name_permutations(IndividualNameCleaver(x).parse()), 'organization': lambda x: [x], 'politician': normalize_politician, 'industry': None, } def dump_normalizations(aliases_file, out_file): reader = csv.DictReader(aliases_file) writer = csv.writer(out_file) in_count = 0 out_count = 0 for line in reader:
def test_primary_name_parts(self): self.assertEqual(['Robert', 'Geoff', 'Smith'], IndividualNameCleaver('Smith, Robert Geoff').parse().primary_name_parts(include_middle=True)) self.assertEqual(['Robert', 'Smith'], IndividualNameCleaver('Smith, Robert Geoff').parse().primary_name_parts())
def test_initialed_first_name(self): self.assertEqual('C. Richard Bonebrake', str(IndividualNameCleaver('C. RICHARD BONEBRAKE').parse()))
def test_md(self): self.assertEqual('C. Richard Bonebrake MD', str(IndividualNameCleaver('C. RICHARD BONEBRAKE, M.D.').parse()))
def test_capitalizes_and_punctuates_initials(self): self.assertEqual('B.L. Schwartz', str(IndividualNameCleaver('SCHWARTZ, BL').parse()))
def test_capitalizes_initials_but_not_honorifics(self): self.assertEqual('John Koza', str(IndividualNameCleaver('KOZA, DR JOHN').parse()))
def test_all_kinds_of_crazy(self): self.assertEqual( 'Stanford Z Rothschild', str( IndividualNameCleaver( 'ROTHSCHILD 212, STANFORD Z MR').parse()))
def test_jr_and_the_like_end_up_at_the_end(self): self.assertEqual( 'Frederick A "Tripp" Baird III', str( IndividualNameCleaver( 'Baird, Frederick A "Tripp" III').parse()))
def test_unparseable_individual_name(self): with self.assertRaises(UnparseableNameException): IndividualNameCleaver("mr & mrs").parse()