Example #1
0
def extractFirstName(name, order):
    '''Split on dots'''
    name = ' '.join(name.split('.'))

    '''Split on - '''
    name = ' '.join(name.split('-'))

    '''Replace numbers by whitespace'''
    oldname = name

    name = re.sub("\d+", "", name)
    if not len(name):
        name = re.sub("\d+", "_", oldname)

    oldname = name
    '''Replace ? by whitespace'''
    name = re.sub("\?", "", name)
    if not len(name):
        name = re.sub("\?", "_", oldname)

    name = ' '.join(name.split('_'))
    
    '''Use the Python name parser'''
    try:
        firstName = getFirstNameFromHumanName(HumanName(name), order)
    except:
        firstName = getFirstNameFromSplitName(name.split(), order)
    
    '''If fail, use heuristics'''
    if firstName.strip() == name.strip():
        '''firstName('Ben Voigt') = 'Ben Voigt'!!!'''
        if len(name.split()) == 2:
            firstName = getFirstNameFromSplitName(name.split(), order)
        else:
            '''Try CamelCase'''
            uncamel = ' '.join(splitCamelCase(name).split('_'))
            if uncamel != name:
                try:
                    firstName = HumanName(uncamel).first
                    if len(firstName.split()) == 2:
                        firstName = getFirstNameFromSplitName(firstName.split(), order)
                except:
                    firstName = getFirstNameFromSplitName(uncamel.split(), order)
    
    if firstName == 'Mc':
        firstName = ''
    if len(firstName) == 1:
        firstName = ''
    return firstName.lower()
Example #2
0
def GetNameLink(name):
    # Finds and returns formatted name and wikilinks for given name.
    name = HumanName(name)
    name.capitalize(force=True)
    name = str(name)
    soup = GetSoup("https://en.wikipedia.org/wiki/" + name.replace(" ", "_"),
                   False).text
    wikitext = name
    tennis = [
        "International Tennis Federation", "Prize money", "Grand Slam",
        "tennis career", "Wikipedia does not have", "may refer to", "WTA",
        "ITF", "ATP"
    ]
    pipe = False
    if soup != None:
        if any([f in soup for f in tennis
                ]):  # player article exists, or no article exists
            if "Redirected from" in soup:
                soup = GetSoup(soup, True)
                title = str(soup.title.string).replace(" - Wikipedia",
                                                       "").strip()
                wikitext = title
                pipe = True  # if name is redirect, pipes wikilink to avoid anachronist names, e.g. using "Margaret Court" instead of "Margaret Smith" before she married.
        else:  # article exists for name but for different person
            wikitext = name + " (tennis)"
            pipe = True
    wikilink = "[[" + wikitext + ("|" + name if pipe else "") + "]]"
    split_name = name.split(" ")
    abbr_name = "-".join(
        f[0] for f in split_name[0].split("-")) + " " + " ".join(
            split_name[1:]
        )  # reduce name to first name initials + last name, e.g. "J-L Struff"
    abbr_wikilink = "[[" + wikitext + "|" + abbr_name + "]]"
    return [name, wikilink, abbr_wikilink]
Example #3
0
def extractFirstName(name, order):
    '''Split on dots'''
    name = ' '.join(name.split('.'))
    '''Split on - '''
    name = ' '.join(name.split('-'))
    '''Replace numbers by whitespace'''
    oldname = name

    name = re.sub("\d+", "", name)
    if not len(name):
        name = re.sub("\d+", "_", oldname)

    oldname = name
    '''Replace ? by whitespace'''
    name = re.sub("\?", "", name)
    if not len(name):
        name = re.sub("\?", "_", oldname)

    name = ' '.join(name.split('_'))
    '''Use the Python name parser'''
    try:
        firstName = getFirstNameFromHumanName(HumanName(name), order)
    except:
        firstName = getFirstNameFromSplitName(name.split(), order)
    '''If fail, use heuristics'''
    if firstName.strip() == name.strip():
        '''firstName('Ben Voigt') = 'Ben Voigt'!!!'''
        if len(name.split()) == 2:
            firstName = getFirstNameFromSplitName(name.split(), order)
        else:
            '''Try CamelCase'''
            uncamel = ' '.join(splitCamelCase(name).split('_'))
            if uncamel != name:
                try:
                    firstName = HumanName(uncamel).first
                    if len(firstName.split()) == 2:
                        firstName = getFirstNameFromSplitName(
                            firstName.split(), order)
                except:
                    firstName = getFirstNameFromSplitName(
                        uncamel.split(), order)

    if firstName == 'Mc':
        firstName = ''
    if len(firstName) == 1:
        firstName = ''
    return firstName.lower()
Example #4
0
def get_last_name(author):
    author = HumanName(author.split("and")[0])
    return author.last