Beispiel #1
0
def run_allName():
    con = mdb.connect('localhost', 'yongjoo', 'Fgla4Zp0', 'yongjoo')
    
    with con:
        
        cur = con.cursor()
        
        sql = '''SELECT name, SUM(num) AS total
                FROM babyname
                GROUP BY name
                ORDER BY total DESC
                LIMIT 1000
                '''
        
        cur.execute(sql)
        
        numrows = int(cur.rowcount)
        
        for i in range(numrows):
            row = cur.fetchone()
            
            name = row[0]
            firstname = name.split(' ')[0]
            predicted_year = babyname.maxLikelyYearForName(firstname)
            predicted_age = 2012 - predicted_year
            
            print name, predicted_age        
Beispiel #2
0
def run_year():

    file_dir = "/home/pyongjoo/workspace/tweetsprocess/data/name-feature/"
    infile = file_dir + "screename-May10-AlmostVerified.csv"

    print infile

    csvreader = csv.reader(open(infile, 'rb'))

    conf_matrix = []
    for i in range(4):
        conf_matrix.append([0,0,0,0])

    for row in csvreader:
        screen_name = row[0]
        fullname = row[1]
        age = row[2]

        firstname = (fullname.split(' '))[0]
        predicted_year = babyname.maxLikelyYearForName(firstname)
        predicted_age = 2012 - predicted_year

        age_group = babyname.ageToAgeGroup(age)

        predicted_group = -1

        try:
            predicted_group = babyname.ageToAgeGroup(predicted_age)
        except Exception:
            pass

        print predicted_year, predicted_group

        if predicted_group != -1:
            conf_matrix[age_group][predicted_group] += 1

    print "Confusion Matrix:"
    for i in range(len(conf_matrix)):
        for j in range(len(conf_matrix[0])):
            sys.stdout.write(str(conf_matrix[i][j]) + " ")
        print