예제 #1
0
db2 = Connection(host = HOST_NAME,
                 database = "jb_pure",
                 user = "******",
                 password = "******" 
                )

tweets = db.query("SELECT * FROM tweets")
count = 0
for t in tweets:
    c = t["tweet"].encode('utf-8')
    # language detection
    name, code, reliable, bytes_found, details = cld.detect(c)
    # compile a regex for urls. We don't want tweets with urls
    r = re.compile(r"(http://[^ ]+)")
    urlmatch = r.search(c)
    # we use a set to save tweets, and check against that to prevent duplicates
    saved = set()
    if (code == "en" or code == "un") and not urlmatch and c not in saved:
        # we allow 'unknown' languages into our database, as these are mostly short singlish sentences
        db2.execute("INSERT INTO tweets (user, tweet, location) VALUES (%s, %s, %s)", t["user"], c, t["location"])
        saved.add(c)
    else:
        print "Not English: " + c + " lang:" + name
        count = count + 1

db.close()
db2.close()
print "Not English: " 
print count

예제 #2
0
with tweetstream.FilterStream(
                SECOND_USERNAME,
                SECOND_PASSWORD,
                locations=locations) as stream:
    for tweet in stream:
        db = Connection(host = HOST_NAME,
                        database = SG_MYSQL_DB_NAME,
                        user = SG_MYSQL_USER_NAME,
                        password = SG_MYSQL_PASSWORD
                )
        username = tweet["user"]["screen_name"]
        text = tweet["text"]
        loc = tweet["place"]["full_name"]

        user = db.get("SELECT id FROM users WHERE username=%s", username)
        if user:
            db.execute("INSERT into tweets (user, tweet, location) VALUES (%s, %s, %s)", user["id"], text, loc)
        else:
            db.execute("INSERT into users (username) VALUES (%s)", username)
            user = db.get("SELECT id FROM users WHERE username=%s", username)
            db.execute("INSERT into tweets (user, tweet, location) VALUES (%s, %s, %s)", user["id"], text, loc)

        db.close()

        #print "==================="
        #print text
        #print username
        #print "(%s)" % loc