def probYearGivenAgeFromFile(age, n_sample=0): datadir = "../data" filename = "ageEmbededTweets-Jun20.jsonarr" names = [] for line in open(os.path.join(datadir, filename)): tweet = json.loads(line) if age == int(tweet["user"]["age"]): name = tweet["user"]["name"] names.append(friends.firstNameOf(name)) if n_sample != 0 and n_sample < len(names): names = random.sample(names, n_sample) return probYearGivenNames(names)
def insertUsersIntoMySQL(): """ Read the users data retrieved from Twitter. The retrieval is done by crawler.py. The mysql db will be: (rowid, screen_name, user_id, age, name, eng) The table might be created by using the below sql. CREATE TABLE `users` ( `rowid` bigint(20) NOT NULL AUTO_INCREMENT, `screen_name` varchar(20) NOT NULL, `user_id` bigint(20) NOT NULL, `age` int(11) DEFAULT NULL, `name` varchar(50) DEFAULT NULL, `eng` tinyint(1) DEFAULT NULL, PRIMARY KEY (`rowid`), UNIQUE KEY `user_id` (`user_id`), KEY `eng` (`eng`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8; """ filedir = "/home/pyongjoo/workspace/twitter-research/data" filename = "users_Jun20_step0-1_2.1.jsonarr" f = open(os.path.join(filedir, filename)) con = db.con() with con: cur = con.cursor() for line in f: j = json.loads(line) # object read using json screen_name = j["screen_name"] user_id = j["id"] name = friends.firstNameOf(j["name"]) eng = 1 if j["lang"] == "en" else 0 cur.execute( """INSERT INTO users VALUES (0, %s, %s, NULL, %s, %s)""", (screen_name, user_id, name, eng), ) f.close()