def main(file_type, filename, db_name): if not os.path.exists(db_name): twitter_utils.create_db(db_name) conn = sqlite3.connect(db_name) tweets = load(file_type, filename) sys.stderr.write("parsed %s tweets. now inserting... " % len(tweets)) for tweet in tweets: try: conn.execute( "INSERT INTO tweets (twitter_id, user_id, is_retweet, created_at, text, in_reply_to_status_id, coordinates, geo, place, source) " "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", ( tweet["id"], None, False, int(time.mktime(tweet["created_at"].timetuple())), tweet["text"], None, None, None, None, None, ), ) except sqlite3.IntegrityError: pass conn.commit() conn.close() sys.stderr.write("done!\n")
def main(screen_name, db_name): if not os.path.exists(db_name): twitter_utils.create_db(db_name) conn = sqlite3.connect(db_name) c = conn.execute("SELECT max(twitter_id) FROM tweets") since_id = c.fetchall()[0][0] page = 1 fail_count = 0 while True: sys.stderr.write("since_id=%s, page=%s, fail_count=%s...\n" % (since_id, page, fail_count)) try: tweets = user_timeline(screen_name, page, since_id=since_id) fail_count = 0 except urllib2.HTTPError: if fail_count < 5: fail_count += 1 continue else: sys.stderr.write("fail whale keeps showing up. stopped retrying...") break if not tweets: break saved_tweets = 0 for tweet in tweets: is_retweet = 'retweeted_status' in tweet if is_retweet: tweet = tweet['retweeted_status'] try: conn.execute("INSERT INTO tweets (twitter_id, user_id, is_retweet, created_at, text, in_reply_to_status_id, coordinates, geo, place, source) " "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (tweet['id'], tweet['user']['id'], is_retweet, int(time.mktime(tweet['created_at'].timetuple())), tweet['text'], tweet['in_reply_to_status_id'], simplejson.dumps(tweet['coordinates']) if tweet.get('coordinates') else None, simplejson.dumps(tweet['geo']) if tweet.get('geo') else None, simplejson.dumps(tweet['place']) if tweet.get('place') else None, tweet.get('source'))) saved_tweets += 1 except sqlite3.IntegrityError: pass conn.commit() sys.stderr.write("saved %s tweets.\n" % saved_tweets) if len(tweets) < API_PAGE_SIZE: break time.sleep(25 * (fail_count + 1)) page += 1 conn.close()