def run_allName(): con = mdb.connect('localhost', 'yongjoo', 'Fgla4Zp0', 'yongjoo') with con: cur = con.cursor() sql = '''SELECT name, SUM(num) AS total FROM babyname GROUP BY name ORDER BY total DESC LIMIT 1000 ''' cur.execute(sql) numrows = int(cur.rowcount) for i in range(numrows): row = cur.fetchone() name = row[0] firstname = name.split(' ')[0] predicted_year = babyname.maxLikelyYearForName(firstname) predicted_age = 2012 - predicted_year print name, predicted_age
def run_year(): file_dir = "/home/pyongjoo/workspace/tweetsprocess/data/name-feature/" infile = file_dir + "screename-May10-AlmostVerified.csv" print infile csvreader = csv.reader(open(infile, 'rb')) conf_matrix = [] for i in range(4): conf_matrix.append([0,0,0,0]) for row in csvreader: screen_name = row[0] fullname = row[1] age = row[2] firstname = (fullname.split(' '))[0] predicted_year = babyname.maxLikelyYearForName(firstname) predicted_age = 2012 - predicted_year age_group = babyname.ageToAgeGroup(age) predicted_group = -1 try: predicted_group = babyname.ageToAgeGroup(predicted_age) except Exception: pass print predicted_year, predicted_group if predicted_group != -1: conf_matrix[age_group][predicted_group] += 1 print "Confusion Matrix:" for i in range(len(conf_matrix)): for j in range(len(conf_matrix[0])): sys.stdout.write(str(conf_matrix[i][j]) + " ") print