def test_formating_removing_pieces_from_name_buckets(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'" hn.string_format = "{title} {first} {middle} {last} {suffix}" assert u(hn) == "Rev John A. Kenneth Doe III" hn.middle = "" assert u(hn) == "Rev John Doe III" hn.suffix = "" assert u(hn) == "Rev John Doe" hn.title = "" assert u(hn) == "John Doe"
def normalize(name): norm_1 = unidecode(" ".join(name.strip().lower().split())).replace("-", " ") norm_2 = re.sub(r'<[^>]+>', r'', norm_1) # remove html hname = HumanName(norm_2) hname.string_format = '{first} {middle} {last}' # return str(hname).replace() return re.sub(r'[^a-z\s]', r'', str(hname))
def clean_authors(authors): cleaned_authors = [] authors = authors.lower() # get rid of commas where there are suffixes, like Jr. or III authors = authors.replace(", jr.", " jr.") authors = authors.replace(", iii", " iii") authors = authors.replace(", ph.d", "") # special cases authors = authors.replace("organizer:", "") authors = authors.replace("roel m,", "roel m.") if authors == 'kozue miyashiro, etsuko harada, t.': author_list = ['kozue miyashiro', 'etsuko harada, t.'] else: author_list = authors.split(",") for author in author_list: author = HumanName(author.lower()) if author.first == '' or author.last == '': raise ValueError("invalid author name: {}".format(author)) author.capitalize() author.string_format = u"{last}, {title} {first} {middle}, {suffix}" cleaned_authors.append(unicode(author)) return cleaned_authors
def get_full_name(person): if (len(person.display_name) > 0 and not person.display_name.isdigit() and not person.display_name.isupper()): return person.display_name name = HumanName(person.full_name) name.capitalize() name.string_format = "{first} {last}" return str(name)
def get_display_name(self): if self.has_display_name(): return self.display_name if self.has_first_name(): name = HumanName("%s %s" % (self.first_name, self.last_name)) else: name = HumanName(self.last_name) name.capitalize() name.string_format = "{first} {last}" return str(name)
def user_fullname(user): if hasattr(user, 'display_name'): if ((user.display_name is None or not len(user.display_name) or user.display_name.isupper()) and hasattr(user, 'first_name')): fullname = HumanName('%s %s' % (user.first_name, user.surname)) fullname.capitalize() fullname.string_format = '{first} {last}' return str(fullname) else: return user.display_name elif hasattr(user, 'email'): return user.email.split('@')[0] # CanvasUser else: raise UserPolicyException('Invalid user')
def test_formating_removing_keys_from_format_string(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'" hn.string_format = "{last}, {title} {first} {middle}, {suffix}" assert u(hn) == "Doe, Rev John A. Kenneth, III" hn.string_format = "{last}, {title} {first} {middle}" assert u(hn) == "Doe, Rev John A. Kenneth" hn.string_format = "{last}, {first} {middle}" assert u(hn) == "Doe, John A. Kenneth" hn.string_format = "{last}, {first}" assert u(hn) == "Doe, John" hn.string_format = "{first} {last}" assert u(hn) == "John Doe"
def parse_file(filepath): with open('filepath', mode="r") as csv_file: csv_reader = csv.reader(csv_file) all_lines = [] single_line = [] for row in csv_reader: # Remove rows with no name if len(row[0]) == 0: pass else: # Remove any other than first and last names name = HumanName(row[0]) name.string_format = "{first} {last}" row[0] = str(name) # Remove prepended 0s row[1] = row[1].strip("0") # Add true if amount > 100 if row[0] != "name": if float(row[1]) > 100: row.append("true") else: row.append("false") # Split the names to first and last names split_row = ' '.join(row).split() # Add them all into a nested list all_lines.append(split_row) # Create new columns for new csv all_lines[0] = ["first_name", "last_name", "price", "above_100"] with open("dataeng_test/parsed_dataset.csv", "w", newline="") as file: writer = csv.writer(file) writer.writerows(all_lines)
def remove_name_titles(name): name = HumanName(name) name.string_format = "{first} {last}" return str(name)
def test_formating(self): hn = HumanName("Rev John A. Kenneth Doe III") hn.string_format = "{title} {first} {middle} {last} {suffix}" self.assertEqual(unicode(hn), "Rev John A. Kenneth Doe III") hn.string_format = "{last}, {title} {first} {middle}, {suffix}" self.assertEqual(unicode(hn), "Doe, Rev John A. Kenneth, III")
def display_name(first_name, surname, reverse=False): name = HumanName("%s %s" % (first_name, surname)) name.capitalize() name.string_format = "{last}, {first}" if ( reverse is True) else "{first} {last}" return unicode(name)
def test_formating_of_nicknames_with_double_quotes(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = '{title} {first} {middle} {last} {suffix} "{nickname}"' assert u(hn) == 'Rev John A. Kenneth Doe III "Kenny"' hn.nickname = "" assert u(hn) == "Rev John A. Kenneth Doe III"
def test_formating_of_nicknames_with_single_quotes(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'" hn.nickname = "" assert u(hn) == "Rev John A. Kenneth Doe III"
def test_formating_of_nicknames_with_parenthesis(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = "{title} {first} {middle} {last} {suffix} ({nickname})" assert u(hn) == "Rev John A. Kenneth Doe III (Kenny)" hn.nickname = "" assert u(hn) == "Rev John A. Kenneth Doe III"
def test_quote_nickname_formating(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'" assert u(hn) == "Rev John A. Kenneth Doe III 'Kenny'" hn.string_format = "{last}, {title} {first} {middle}, {suffix} '{nickname}'" assert u(hn) == "Doe, Rev John A. Kenneth, III 'Kenny'"
#!/usr/bin/python # -*- coding: utf-8 -*- from nameparser import HumanName name = HumanName("Dr. Juan Q. Xavier de la Vega III (Doc Vega)") #name print(name.last) print(name.as_dict()) print(str(name)) name.string_format = "{first} {last}" print(str(name))
def test_formating_of_nicknames_in_middle(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = "{title} {first} ({nickname}) {middle} {last} {suffix}" assert u(hn) == "Rev John (Kenny) A. Kenneth Doe III" hn.nickname = "" assert u(hn) == "Rev John A. Kenneth Doe III"