Exemple #1
0
def probYearGivenAgeFromFile(age, n_sample=0):
    datadir = "../data"
    filename = "ageEmbededTweets-Jun20.jsonarr"

    names = []

    for line in open(os.path.join(datadir, filename)):
        tweet = json.loads(line)
        if age == int(tweet["user"]["age"]):
            name = tweet["user"]["name"]
            names.append(friends.firstNameOf(name))

    if n_sample != 0 and n_sample < len(names):
        names = random.sample(names, n_sample)

    return probYearGivenNames(names)
Exemple #2
0
def insertUsersIntoMySQL():
    """
    Read the users data retrieved from Twitter. The retrieval is done by
    crawler.py. The mysql db will be:

        (rowid, screen_name, user_id, age, name, eng)

    The table might be created by using the below sql.

        CREATE TABLE `users` (
          `rowid` bigint(20) NOT NULL AUTO_INCREMENT,
          `screen_name` varchar(20) NOT NULL,
          `user_id` bigint(20) NOT NULL,
          `age` int(11) DEFAULT NULL,
          `name` varchar(50) DEFAULT NULL,
          `eng` tinyint(1) DEFAULT NULL,
          PRIMARY KEY (`rowid`),
          UNIQUE KEY `user_id` (`user_id`),
          KEY `eng` (`eng`)
        ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
    """
    filedir = "/home/pyongjoo/workspace/twitter-research/data"
    filename = "users_Jun20_step0-1_2.1.jsonarr"
    f = open(os.path.join(filedir, filename))

    con = db.con()

    with con:
        cur = con.cursor()

        for line in f:
            j = json.loads(line)  # object read using json
            screen_name = j["screen_name"]
            user_id = j["id"]
            name = friends.firstNameOf(j["name"])
            eng = 1 if j["lang"] == "en" else 0

            cur.execute(
                """INSERT INTO users VALUES
                        (0, %s, %s, NULL, %s, %s)""",
                (screen_name, user_id, name, eng),
            )

    f.close()