def clean_authors(authors):
    cleaned_authors = []
    authors = authors.lower()

    # get rid of commas where there are suffixes, like Jr. or III
    authors = authors.replace(", jr.", " jr.")
    authors = authors.replace(", iii", " iii")
    authors = authors.replace(", ph.d", "")

    # special cases
    authors = authors.replace("organizer:", "")
    authors = authors.replace("roel m,", "roel m.")
    if authors == 'kozue miyashiro, etsuko harada, t.':
        author_list = ['kozue miyashiro', 'etsuko harada, t.']
    else:
        author_list = authors.split(",")

    for author in author_list:
        author = HumanName(author.lower())

        if author.first == '' or author.last == '':
            raise ValueError("invalid author name: {}".format(author))

        author.capitalize()
        author.string_format = u"{last}, {title} {first} {middle}, {suffix}"

        cleaned_authors.append(unicode(author))

    return cleaned_authors
Example #2
0
def extractFirstName(name, order):
    '''Split on dots'''
    name = ' '.join(name.split('.'))

    '''Split on - '''
    name = ' '.join(name.split('-'))

    '''Replace numbers by whitespace'''
    oldname = name

    name = re.sub("\d+", "", name)
    if not len(name):
        name = re.sub("\d+", "_", oldname)

    oldname = name
    '''Replace ? by whitespace'''
    name = re.sub("\?", "", name)
    if not len(name):
        name = re.sub("\?", "_", oldname)

    name = ' '.join(name.split('_'))
    
    '''Use the Python name parser'''
    try:
        firstName = getFirstNameFromHumanName(HumanName(name), order)
    except:
        firstName = getFirstNameFromSplitName(name.split(), order)
    
    '''If fail, use heuristics'''
    if firstName.strip() == name.strip():
        '''firstName('Ben Voigt') = 'Ben Voigt'!!!'''
        if len(name.split()) == 2:
            firstName = getFirstNameFromSplitName(name.split(), order)
        else:
            '''Try CamelCase'''
            uncamel = ' '.join(splitCamelCase(name).split('_'))
            if uncamel != name:
                try:
                    firstName = HumanName(uncamel).first
                    if len(firstName.split()) == 2:
                        firstName = getFirstNameFromSplitName(firstName.split(), order)
                except:
                    firstName = getFirstNameFromSplitName(uncamel.split(), order)
    
    if firstName == 'Mc':
        firstName = ''
    if len(firstName) == 1:
        firstName = ''
    return firstName.lower()
Example #3
0
    def names_compare(name1, name2):
        """
        Takes string arguments with human names and returns indication of match between names.
        :param name1: String argument with name
        :param name2: String argument with name
        :return: "exact" for definite matches, "last" for only last name matches, False for non-matches
        """

        if not isinstance(name1, str) or not isinstance(name2, str):
            raise TypeError(
                "CosineCalc.names_compare must receive both string arguments.")

        name1 = HumanName(name1.lower())
        name2 = HumanName(name2.lower())

        # Check for exact matches
        last_names_match = name1.last == name2.last
        first_names_match = name1.first == name2.first

        # Check for short names
        order = len(name1.first) < len(name2.first)
        if order:
            nick_name = name1.first in name2.first
        else:
            nick_name = name2.first in name1.first

        if last_names_match and first_names_match:
            result = "exact"
        elif last_names_match and nick_name:
            result = "exact"
        elif last_names_match:
            result = "last"
        else:
            result = False

        return result
Example #4
0
def extractFirstName(name, order):
    '''Split on dots'''
    name = ' '.join(name.split('.'))
    '''Split on - '''
    name = ' '.join(name.split('-'))
    '''Replace numbers by whitespace'''
    oldname = name

    name = re.sub("\d+", "", name)
    if not len(name):
        name = re.sub("\d+", "_", oldname)

    oldname = name
    '''Replace ? by whitespace'''
    name = re.sub("\?", "", name)
    if not len(name):
        name = re.sub("\?", "_", oldname)

    name = ' '.join(name.split('_'))
    '''Use the Python name parser'''
    try:
        firstName = getFirstNameFromHumanName(HumanName(name), order)
    except:
        firstName = getFirstNameFromSplitName(name.split(), order)
    '''If fail, use heuristics'''
    if firstName.strip() == name.strip():
        '''firstName('Ben Voigt') = 'Ben Voigt'!!!'''
        if len(name.split()) == 2:
            firstName = getFirstNameFromSplitName(name.split(), order)
        else:
            '''Try CamelCase'''
            uncamel = ' '.join(splitCamelCase(name).split('_'))
            if uncamel != name:
                try:
                    firstName = HumanName(uncamel).first
                    if len(firstName.split()) == 2:
                        firstName = getFirstNameFromSplitName(
                            firstName.split(), order)
                except:
                    firstName = getFirstNameFromSplitName(
                        uncamel.split(), order)

    if firstName == 'Mc':
        firstName = ''
    if len(firstName) == 1:
        firstName = ''
    return firstName.lower()
Example #5
0
 def initContact(contactId: str):
     assert contactId not in activeContacts
     activeContacts.add(contactId)
     contact = dir.getContact(contactId)
     contactsToEmails[contactId] = contact['email']
     name = " ".join(
         filter(None, [
             contact.get('title_before_name'),
             contact.get('first_name'),
             contact.get('last_name'),
             contact.get('title_after_name')
         ]))
     #name = name.translate(str.maketrans('', '', string.punctuation))
     name = name.translate(str.maketrans('', '', '@'))
     name = HumanName(name.lower().strip())
     name.capitalize()
     if re.search('^(\w\.)+$', name.first):
         name.first = name.first.upper()
     contactsToNames[contactId] = name.__str__()