Esempio n. 1
0
def get_all_tweets():
    """returns all the tweets in db

    Returns:
        rows: list of tuples. For example;
                [('this is a test positive tweet', 'pos'),
                ('this is a test negative tweet', 'neg'), ...]
    """
    db = DatabaseDriver()
    db.setup()
    r = db.cur.execute('SELECT tweet, label FROM tweets')
    rows = r.fetchall()
    db.teardown()
    return rows
Esempio n. 2
0
def sanitize():
    """filters all the tweets residing in the database
    """
    db = DatabaseDriver()
    db.setup()

    r = db.cur.execute('SELECT * FROM tweets')
    rows = r.fetchall()

    for row in rows:
        id, tweet, label = row
        ftweet = filter(tweet)
        print ftweet
        db.cur.execute('UPDATE tweets SET tweet=? WHERE id=?', (ftweet, id))
    db.conn.commit()
    db.teardown()
Esempio n. 3
0
class Miner(object):
    """
    """
    def __init__(self):
        self.dbd = DatabaseDriver()
        self.dbd.setup()
        self.dbd.ensure_table()

    def save(self, tweet):
        """saves tweet to the db

        Args:
            tweet: text string
        """
        self.dbd.cur.execute("SELECT tweet FROM tweets WHERE tweet=?", (tweet,))
        already_in_db = self.dbd.cur.fetchone()
        if not already_in_db:
            self.dbd.cur.execute("INSERT INTO tweets(tweet) VALUES(?)", (tweet,))
        print 'duplicate!!!'

    def mine(self, query, rpp, max_tweets):
        """prepares test data for training

        Args:
            query: search query to be used in twitter search
            rpp: rate per page is the param to the twitter search url
            max_tweets: maximum number of tweets to crawl
        """
        crawler = Crawler(query, rpp, max_tweets)
        crawler.start()

        tweets = crawler.tweets
        for tweet in tweets:
            self.save(tweet)

        self.dbd.conn.commit()
        self.dbd.teardown()
Esempio n. 4
0
 def __init__(self):
     self.dbd = DatabaseDriver()
     self.dbd.setup()
     self.dbd.ensure_table()