def get_all_tweets(): """returns all the tweets in db Returns: rows: list of tuples. For example; [('this is a test positive tweet', 'pos'), ('this is a test negative tweet', 'neg'), ...] """ db = DatabaseDriver() db.setup() r = db.cur.execute('SELECT tweet, label FROM tweets') rows = r.fetchall() db.teardown() return rows
def sanitize(): """filters all the tweets residing in the database """ db = DatabaseDriver() db.setup() r = db.cur.execute('SELECT * FROM tweets') rows = r.fetchall() for row in rows: id, tweet, label = row ftweet = filter(tweet) print ftweet db.cur.execute('UPDATE tweets SET tweet=? WHERE id=?', (ftweet, id)) db.conn.commit() db.teardown()
class Miner(object): """ """ def __init__(self): self.dbd = DatabaseDriver() self.dbd.setup() self.dbd.ensure_table() def save(self, tweet): """saves tweet to the db Args: tweet: text string """ self.dbd.cur.execute("SELECT tweet FROM tweets WHERE tweet=?", (tweet,)) already_in_db = self.dbd.cur.fetchone() if not already_in_db: self.dbd.cur.execute("INSERT INTO tweets(tweet) VALUES(?)", (tweet,)) print 'duplicate!!!' def mine(self, query, rpp, max_tweets): """prepares test data for training Args: query: search query to be used in twitter search rpp: rate per page is the param to the twitter search url max_tweets: maximum number of tweets to crawl """ crawler = Crawler(query, rpp, max_tweets) crawler.start() tweets = crawler.tweets for tweet in tweets: self.save(tweet) self.dbd.conn.commit() self.dbd.teardown()
def __init__(self): self.dbd = DatabaseDriver() self.dbd.setup() self.dbd.ensure_table()