Ejemplo n.º 1
0
def build_names():
    cnt = 0
    for name,hints,nicknames in GivenNames:
        if type(name) != tuple:
            name = (name,)
        name = tuple([names.normalize(n) for n in name])
        if type(hints) != tuple:
            hints = (hints,)
        for n in name:
            for h in hints:
                if not hint.is_hint(h):
                    print((name,h,' is not a hint'))
                else:
                    cnt += 1
                if n in GivenNameDict:
                    GivenNameDict[n].append(h)
                else:
                    GivenNameDict[n] = [h]
        if type(nicknames) != tuple:
            nicknames = (nicknames,)
        for nick in nicknames:
            nick = names.normalize(nick)
            if nick in Nicknames:
                Nicknames[nick].append(list(name))
            else:
                Nicknames[nick] = list(name)
    return cnt
Ejemplo n.º 2
0
def build_names():
    cnt = 0
    for name, hints, nicknames in GivenNames:
        if type(name) != tuple:
            name = (name, )
        name = tuple([names.normalize(n) for n in name])
        if type(hints) != tuple:
            hints = (hints, )
        for n in name:
            for h in hints:
                if not hint.is_hint(h):
                    print((name, h, ' is not a hint'))
                else:
                    cnt += 1
                if n in GivenNameDict:
                    GivenNameDict[n].append(h)
                else:
                    GivenNameDict[n] = [h]
        if type(nicknames) != tuple:
            nicknames = (nicknames, )
        for nick in nicknames:
            nick = names.normalize(nick)
            if nick in Nicknames:
                Nicknames[nick].append(list(name))
            else:
                Nicknames[nick] = list(name)
    return cnt
Ejemplo n.º 3
0
def lookup(conn, surnames):
    for name in surnames:
        # handle hyphenated surnames
        for subname in name.split('-'):
            norm = names.normalize(subname)
            e = ethnicity(conn, norm)
            o = origin(conn, norm)
            print(('%-15s %5.2f%% %-6s %s' % (subname,e[1],e[0],' '.join(o))))
Ejemplo n.º 4
0
def lookup(conn, surnames):
    for name in surnames:
        # handle hyphenated surnames
        for subname in name.split('-'):
            norm = names.normalize(subname)
            e = ethnicity(conn, norm)
            o = origin(conn, norm)
            print(
                ('%-15s %5.2f%% %-6s %s' % (subname, e[1], e[0], ' '.join(o))))
Ejemplo n.º 5
0
def yob_generator():
    files = glob.glob('./' + dir + '/yob*.txt')
    for filename in files:
        year = re.search('yob(\d+)\.txt', filename).group(1)
        if int(year) >= 1900:
            for line in open(filename, 'r'):
                (name,gender,total) = line.strip().split(",")
                norm = names.normalize(name)
                yield (year, norm, gender, total)
Ejemplo n.º 6
0
def classify(name):
    name = names.normalize(name)
    # get list of all names to look up
    name = [name] + (Nicknames[name] if name in Nicknames else [])
    # get list of list of hints
    hint = [GivenNameDict[n] if n in GivenNameDict else [] for n in name]
    # merge lists
    hint = list(itertools.chain(*hint))
    # unique-ify list FIXME: no good; order matters
    #hint = list(set(hint))
    return hint
Ejemplo n.º 7
0
def classify(name):
    name = names.normalize(name)
    # get list of all names to look up
    name = [name] + (Nicknames[name] if name in Nicknames else [])
    # get list of list of hints
    hint = [GivenNameDict[n] if n in GivenNameDict else [] for n in name]
    # merge lists
    hint = list(itertools.chain(*hint))
    # unique-ify list FIXME: no good; order matters
    #hint = list(set(hint))
    return hint
Ejemplo n.º 8
0
def lookup(conn, givennames):
    for name in givennames:
        norm = names.normalize(name)
        g = gender(conn, norm)
        pct, span = 70, 20
        plo, phi = birthspan_pct(conn, norm, pct)
        (slo, shi), spct = birthspan(conn, norm, span)
        hints = givenname_origin.classify(norm)
        if g['F'] == 0. and g['M'] == 0. and spct == 0 and hints == []:
            # name not found
            print('%-15s %3.0f%%' % (name, max(g.values()) * 100.))
        else:
            print('%-15s %3.0f%%%s %3.0f%%@%.0fyr=%d-%d %.0f%%=%d-%d %s' %
                  (name, max(g.values()) * 100., 'F' if g['F'] >= g['M'] else
                   'M', spct, span, slo, shi, pct, plo, phi, hints))
Ejemplo n.º 9
0
def lookup(conn, givennames):
    for name in givennames:
        norm = names.normalize(name)
        g = gender(conn, norm)
        pct, span = 70, 20
        plo,phi = birthspan_pct(conn, norm, pct)
        (slo,shi),spct = birthspan(conn, norm, span)
        hints = givenname_origin.classify(norm)
        if g['F'] == 0. and g['M'] == 0. and spct == 0 and hints == []:
            # name not found
            print('%-15s %3.0f%%' %
                (name, max(g.values())*100.))
        else:
            print('%-15s %3.0f%%%s %3.0f%%@%.0fyr=%d-%d %.0f%%=%d-%d %s' %
                (name, max(g.values())*100.,
                'F' if g['F'] >= g['M'] else 'M',
                spct, span, slo, shi, pct, plo, phi, hints))