Exemple #1
0
    def startMining(self):
        try:
            tso = TwitterSearchOrder()  # create a TwitterSearchOrder object
            tso.set_keywords(
                self.keywords
            )  # let's define all words we would like to have a look for
            # tso.set_language('de') # we want to see German tweets only
            tso.set_include_entities(
                False)  # and don't give us all those entity information

            # it's about time to create a TwitterSearch object with our secret tokens
            ts = TwitterSearch(consumer_key=self.consumer_key,
                               consumer_secret=self.consumer_secret,
                               access_token=self.access_token,
                               access_token_secret=self.access_token_secret)

            sleep_for = 60  # sleep for 60 seconds
            last_amount_of_queries = 0  # used to detect when new queries are done

            # this is where the fun actually starts :)
            for tweet in ts.search_tweets_iterable(tso):
                print('@%s tweeted: %s' %
                      (tweet['user']['screen_name'], tweet['text']))

                current_amount_of_queries = ts.get_statistics()[0]
                if not last_amount_of_queries == current_amount_of_queries:
                    last_amount_of_queries = current_amount_of_queries
                    time.sleep(sleep_for)

        except TwitterSearchException as e:  # take care of all those ugly errors if there are some
            print(e)
def main():
    """
    Retrieve news tweet using Twitter Search API with filter for news.
    Important note: we downloaded TwitterSearch from https://github.com/ckoepp/TwitterSearch and added the
    filter to the search URL!
    This filter retrieves tweets from the requested date (or from now if left empty) that contain links to news
    web sites.
    """

    args = docopt(
        """Retrieve news tweet using Twitter Search API with filter for news.
        Important note: we downloaded TwitterSearch from https://github.com/ckoepp/TwitterSearch and added the
        filter to the search URL!
        This filter retrieves tweets from the requested date (or from now if left empty) that contain links to news
        web sites.

        This script will save the tweets in a file named by the date they were created at.

        Usage: get_news_tweets_stream.py <consumer_key> <consumer_secret> <access_token> <access_token_secret> [<until>]

        Argumments:
            consumer_key  Consumer key for the Twitter API
            consumer_secret  Consumer secret for the Twitter API
            access_token  Access key token for the Twitter API
            access_token_secret  Access token secret for the Twitter API
            until  (Optional): date in the format YYYY/MM/dd. Retrieve tweets only until this date.
            If this argument is not specified, it will retrieve current tweets.
            The Search API only supports up to one week ago.
    """)

    consumer_key = args['<consumer_key>']
    consumer_secret = args['<consumer_secret>']
    access_token = args['<access_token>']
    access_token_secret = args['<access_token_secret>']

    tso = TwitterSearchOrder()
    tso.set_keywords(['.'])
    tso.set_language('en')

    # Set until date
    if args['<until>']:
        year, month, day = map(int, args['-until'].split('/'))
        out_tweet_file = 'news_stream/tweets/%d_%02d_%02d' % (year, month,
                                                              day - 1)
        tso.set_until(datetime.date(year, month, day))
    else:
        out_tweet_file = 'news_stream/tweets/' + time.strftime('%Y_%m_%d')

    sleep_for = 10
    last_amount_of_queries = 0

    ts = TwitterSearch(consumer_key=consumer_key,
                       consumer_secret=consumer_secret,
                       access_token=access_token,
                       access_token_secret=access_token_secret)

    tweets = set()

    with codecs.open(out_tweet_file, 'w', 'utf-8') as f_out:

        # Stop this manually
        while True:
            try:

                # Get the next batch of tweets
                for tweet in ts.search_tweets_iterable(tso):
                    text = clean_tweet(tweet['text'].encode(
                        sys.getdefaultencoding(), 'ignore').replace('\n', ' '))

                    if not text in tweets:
                        print >> f_out, '\t'.join(
                            (tweet['created_at'], str(tweet['id']),
                             tweet['user']['screen_name'], text))
                        tweets.add(text)

                    current_amount_of_queries = ts.get_statistics()[0]

                    # Handle API rate limit
                    if not last_amount_of_queries == current_amount_of_queries:
                        last_amount_of_queries = current_amount_of_queries
                        time.sleep(sleep_for)

            except TwitterSearchException as e:
                time.sleep(sleep_for)
                pass
from TwitterSearch import *
from settings import *
import time


try:
    tso = TwitterSearchOrder()
    tso.set_keywords(['brexit', 'May'])
    
    ts = TwitterSearch(
                       consumer_key = twitter_consumer_key,
                       consumer_secret = twitter_consumer_secret,
                       access_token = twitter_access_token,
                       access_token_secret = twitter_access_token_secret
                       )
    sleep_for = 5 # sleep for 60 seconds
    last_amount_of_queries = 0 # used to detect when new queries are done
    
    for tweet in ts.search_tweets_iterable(tso):
        print tweet
#        print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
        current_amount_of_queries = ts.get_statistics()[0]
        if not last_amount_of_queries == current_amount_of_queries:
            last_amount_of_queries = current_amount_of_queries
            time.sleep(sleep_for)



except TwitterSearchException as e:
    print(e)
        access_token_secret='qxvgonMEav4yHBKtdC8gXtOlI4S0VqlCveFpK7hmDu2hF')

    #for tweet in ts.search_tweets_iterable(tso):
    #	print('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text']))
    #print(len(ts.search_tweets_iterable(tso)))

    sleep_for = 10  # sleep for 60 seconds
    last_amount_of_queries = 0  # used to detect when new queries are done

    analyzer = SentimentIntensityAnalyzer()

    for tweet in ts.search_tweets_iterable(tso):
        vs = analyzer.polarity_scores(tweet['text'])

        #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )

        tweet_text = filterTweet(tweet['text'])
        #import pdb; pdb.set_trace()
        print("{:-<65} {}".format(tweet_text, str(vs)))

        #import pdb; pdb.set_trace()
        #queries, tweets_seen = current_ts_instance.get_statistics()

        current_amount_of_queries = ts.get_statistics()[0]
        if not last_amount_of_queries == current_amount_of_queries:
            last_amount_of_queries = current_amount_of_queries
            time.sleep(sleep_for)

except TwitterSearchException as e:  # take care of all those ugly errors if there are some
    print(e)
Exemple #5
0
def twitSearch(tweetLastSeen):
    #print("Debug: In function twitSearch()")
    tweetSearchCount = 0
    try:
        tso = TwitterSearchOrder()
        #tso.set_keywords(['disaster','banking'], or_operator = True)
        tso.set_keywords(['disaster','poverty','banking','homeless'], or_operator = True)
        #tso.add_keyword('poverty')
        #tso.add_keyword('disaster')
        #tso.add_keyword('banking')
        tso.set_language('en')
        tso.set_include_entities(False)
        tso.set_result_type('recent')

        if tweetLastSeen > 0:
            print("Debug: I have a previous value for lastseen_id, setting since_id() to: %s and asking for 100 results" % tweetLastSeen)
            tso.set_since_id(tweetLastSeen)
            tso.set_count(100)
        else:
            print("Debug: No value for lastseen_id, asking for one result")
            tso.set_count(1)

        print("Debug: The tso search string looks like this")
        print(tso.create_search_url())

        ts = TwitterSearch(
            consumer_key = '',
            consumer_secret = '',
            access_token = '',
            access_token_secret = '')

##        def my_callback_function(current_ts_instance): # accepts ONE argument: an instance of TwitterSearch
##            #print("In callback function")
##            queries, tweets_seen = current_ts_instance.get_statistics()
##            #query = current_ts_instance.get_statistics()
##            print("%s queries & %s tweets seen" %(queries, tweets_seen))
##            print("%s query" %(query))
##            #if queries > 0 and (queries % 5) == 0: # trigger delay every 5th query
##                #print("Thats 5 queries. Sleeping for 60 secs")
##                #time.sleep(60) # sleep for 60 seconds

        #queries, tweets_seen = ts.get_statistics()
        #print("Debug: %s queries & %s tweets seen" %(queries, tweets_seen))

        #print("Debug: About to iterate over search results from TwitterSearch instance")
        #for tweet in ts.search_tweets_iterable(tso, callback=my_callback_function):
                
        tweets_seen = 0        
        currentTweetID = 0
        lastTweetID = 0

        for tweet in ts.search_tweets_iterable(tso):    
            queries, tweets_seen_by_stats = ts.get_statistics()
            print("Debug: stats: %s queries & %s tweets seen" %(queries, tweets_seen_by_stats))
            rateLimitRemaining = ts.get_metadata()['x-rate-limit-remaining']
            rateLimitReset = ts.get_metadata()['X-Rate-Limit-Reset']
            print("Debug: Rate limit resets at %s and has %s queries remaining" %(datetime.datetime.fromtimestamp(int(rateLimitReset)), rateLimitRemaining))
            currentTweetID = tweet['id']
            print("Debug: Current tweetID %s" % currentTweetID)
            if currentTweetID > lastTweetID:
                print("Debug: Seen a more recent tweetID, updating lastTweetID")
                lastTweetID = currentTweetID
                tweets_seen = tweets_seen_by_stats
                break
            print( 'Debug: In tweet iter @%s tweet id: %s' % ( tweet['user']['screen_name'], tweet['id'] ) )
            tweets_seen = tweets_seen + 1
            print("Debug: tweets_seens: %s" % tweets_seen)
            
        print('Debug: about to return tweet ID @%s' % lastTweetID )
        global twitterSearchCount
        twitterSearchCount = twitterSearchCount + 1
        print("Debug: This is twitter search number: %s" % twitterSearchCount)
        
        return lastTweetID, tweets_seen

    except TwitterSearchException as e:
        print(e)
Exemple #6
0
    def avoid_rate_limit(self, ts):  # accepts ONE argument: an instance of TwitterSearch

        queries, tweets_seen = ts.get_statistics()
        if queries > 0 and (queries % 5) == 0:  # trigger delay every 5th query
            time.sleep(30)  # sleep for 60 seconds
last_num_queries = 0
retrieved_tweets = set()
number_of_repeat_tweets = 0

try:
    tweetpkl = open('lol.pkl','wb')
    tweets = []
    for tweet in ts.search_tweets_iterable(tso):
        tweets.append(tweet)
        if tweet['id'] in retrieved_tweets:
            print("tweet has already been retrieved")
            number_of_repeat_tweets += 1
        else:
            retrieved_tweets.add(tweet['id'])

        cur_num_queries,tweets_recd = ts.get_statistics()
        if not last_num_queries == cur_num_queries:
            print("Queries done: %i. Tweets received: %i" % ts.get_statistics())
            last_num_queries = cur_num_queries
            print("Sleeping for",sleep_for,"seconds")
            time.sleep(sleep_for)

    pickle.dump(tweets,tweetpkl)
    print("Wrote tweets to file",tweetpkl.name)
    print("number_of_repeat_tweets: ",number_of_repeat_tweets)
    tweetpkl.close()

except TwitterSearchException as e:
    if e.code < 1000:
        print("HTTP status based exception: %i - %s" % (e.code, e.message))
    else:
Exemple #8
0
class JCrawler(object):
    'Twitter crawler for Japanese tweets'

    tso = TwitterSearchOrder()
    ts = None
    con = None
    cursor = None
    DAILY_THRESHOLD = 50
    IDLE_SLEEP_TIME = 60
    total_api_queries = 0
    queries_processed = 0
    total_tweets_grabbed = 0
    max_tweets = -1
    global_min_id = -1
    global_max_id = -1
    depth = 10
    seed_keywords_array = None
    logfile = None
    exact_tweet_re = re.compile('^RT @')
    queries_to_update = 0

    consumer_key = ''
    consumer_secret = ''
    access_token = ''
    access_token_secret = ''

    def __init__(self, consumer_key, consumer_secret, access_token, access_token_secret, seed_keywords_array=None, depth=1000,
                 global_min_id=-1, global_max_id=-1, max_tweets=-1):
        # self.load_settings()
        self.depth = depth
        self.seed_keywords_array = seed_keywords_array
        self.global_max_id = global_max_id
        self.global_min_id = global_min_id
        self.max_tweets = max_tweets
        self.consumer_key = consumer_key
        self.consumer_secret = consumer_secret
        self.access_token = access_token
        self.access_token_secret = access_token_secret

        # Open the logfile
        self.logfile = codecs.open("log/tweetlog", 'a', encoding='utf-8')

        if self.max_tweets == -1:
            self.max_tweets = float("inf")

    # def load_settings(self, filepath=""):
        # if(filepath == ""):
        #    filepath = "settings.cfg"

        # config = ConfigParser.RawConfigParser()
        # config.read(filepath)

    def log(self, message):
        # print(message)
        self.logfile.write(message)

    def get_old_queries_from_DB(self):
        self.checkConnection()
        db_query = u"""SELECT * FROM `queries` WHERE
                         `next_update` <= %s AND total_updates > 0"""
        dict_cursor = self.con.cursor(pymysql.cursors.DictCursor)
        dict_cursor.execute(db_query,
                            [int(time.time())])

        self.queries_to_update += dict_cursor.rowcount
        # Get the results
        rows = dict_cursor.fetchall()
        # query_array = []
        # for row in rows:
        #     query_array.append(row)

        return rows

    def update_queries(self):
        query_array = self.get_old_queries_from_DB()

        for query in query_array:
            if(self.total_tweets_grabbed >= self.max_tweets):
                break
            if "min_id" in query:
                min_id = query["min_id"]
            else:
                min_id = None

            if "max_id" in query:
                max_id = query["max_id"]
            else:
                max_id = None

            num_tweets_fetched = scrape_search_query(query, self.depth, min_id, max_id)
            calculate_next_update(query, num_tweets_fetched)

    def get_new_queries_from_DB(self):
        self.checkConnection()
        db_query = u"""SELECT * FROM `queries`
                         WHERE `next_update` <= %s AND total_updates = 0"""
        dict_cursor = self.con.cursor(pymysql.cursors.DictCursor)
        dict_cursor.execute(db_query,
                            [int(time.time())])
        self.queries_to_update += dict_cursor.rowcount
        # Get the results
        rows = dict_cursor.fetchall()

        return rows

    def process_new_queries(self, query_string_array=None):
        if(query_string_array is None):
            query_array = self.get_new_queries_from_DB()
        else:
            query_array = []
            for string in query_string_array:
                query_array.append({'query': string})

        for query in query_array:
            if(self.total_tweets_grabbed >= self.max_tweets):
                break
            num_tweets_fetched = self.scrape_search_query(query, self.depth)
            self.calculate_next_update(query, num_tweets_fetched)

    def connect_DB(self, host, dbuser, dbpass, dbname):
        try:
            conn = pymysql.connect(host,
                                   dbuser,
                                   dbpass,
                                   dbname,
                                   charset='utf8mb4',
                                   use_unicode=True,
                                   init_command="set names utf8mb4")

            testcursor = conn.cursor()
            testcursor.execute("SELECT VERSION()")
            results = testcursor.fetchone()
            # Check if anything at all is returned
            if results:
                self.log('Connected to DB, version: %s' % results)
            else:
                self.log('Connected to DB failed')

            # Disable binary logging to save space
            testcursor.execute("SET sql_log_bin = 0")
            results = testcursor.fetchone()
            testcursor.execute("SET NET_WRITE_TIMEOUT = 2147480")
            results = testcursor.fetchone()
            testcursor.execute("SET WAIT_TIMEOUT = 2147480")
            results = testcursor.fetchone()

            self.con = conn

            return conn

        except pymysql.Error as e:
            self.log("\nError %d: %s" % (e.args[0], e.args[1]))

    def start(self):
        try:
            # self.log('Connected to DB: %s' % self.con.info())
            # while True:
            while self.total_tweets_grabbed < self.max_tweets:
                self.ReportStatus(1)
                self.log("\nUpdating old queries")
                self.update_queries()
                self.log("\nProcessing new queries")
                if(self.seed_keywords_array is not None):
                    self.process_new_queries(self.seed_keywords_array)
                    # Only process seed keywords once
                    self.seed_keywords_array = None
                else:
                    self.process_new_queries()
                    # Keep script from wasting DB resources continually checking for queries to update
                    if(self.queries_to_update == 0):
                        self.ReportStatus(0)
                        time.sleep(self.IDLE_SLEEP_TIME)
            # while AnalysisIsRunning():
                # self.ReportStatus(0)
                # time.sleep(self.IDLE_SLEEP_TIME)

                if (not self.con.open):
                    self.con = self.connect_DB('localhost',
                                               'root',
                                               'preyfromslyfluxlinus',
                                               'jcrawler')

                #self.queries_to_update = 0
        except TwitterSearchException as e:
            if(e.code == 420):
                self.log("\nBlaze it for a hot sec: we're being rate limited. Waiting 15 minutes...")
                self.ReportStatus(2)
                time.sleep(900)
                self.log("\nAttempting to restart scrape process...")
                return self.scrape_search_query(search_query, depth, min_id, max_id)
            elif(e.code == 403 or e.code == 503):
                self.log("\nTwitter is either experiencing high load or we are being rate limited. Waiting 1 hour...")
                self.ReportStatus(2)
                time.sleep(3600)
                self.log("\nAttempting to restart scrape process...")
                return self.scrape_search_query(search_query, depth, min_id, max_id)
            else:
                # self.log(e)
                logging.exception("TwitterSearch had an error:")
                print(e)
        except pymysql.Error as e:
            self.log("\nError %d: %s" % (e.args[0], e.args[1]))
        else:
            self.log("\nQuitting")
            if self.con:
                self.con.close()
            if self.logfile:
                self.logfile.close()

    def restart(self):
        if(self.con.open):
            self.con.close()
        self.cursor = None
        self.total_tweets_grabbed = 0
        self.logfile = codecs.open("log/tweetlog", 'a', encoding='utf-8')
        self.start()

    def checkConnection(self):
        if(self.con is None or not self.con.open):
            self.con = self.connect_DB('localhost',
                                       'root',
                                       'preyfromslyfluxlinus',
                                       'jcrawler')
            self.cursor = self.con.cursor()

    def ReportStatus(self, status, msg=""):
        'Posts info about queries processed, currently processing, etc.'
        self.checkConnection()
        app = "jcrawler"
        if(status == 1):  # Currently running
            variable = "processing"
            value = "1"
        elif(status == 0):  # Done running
            variable = "processing"
            value = "0"
        elif(status == 2):
            # Sleeping
            variable = "processing"
            value = "2"

        controlQuery = u"""INSERT INTO `control`(`app`, `variable`, `value`) VALUES(%s,%s,%s) 
        ON DUPLICATE KEY UPDATE value=VALUES(value)"""
        progressQuery = u"""INSERT INTO `progress`(`app`, `processed`) VALUES(%s, %s)
        """
        reportcursor = self.con.cursor()
        reportcursor.execute(controlQuery, (app, variable, value))
        reportcursor.execute(progressQuery, (app, self.total_tweets_grabbed))
        self.con.commit()
        reportcursor.close()

    def AnalysisIsRunning():
        self.checkConnection()
        isRunning = False
        statusQuery = u"""
        SELECT `value` FROM `control` WHERE `app`='lexicalyzer' AND `variable`='processing'
        """
        statuscursor = self.con.cursor()
        statuscursor.execute(statusQuery)
        value = self.cursor.fetchone()
        if(value[0] > 0):
            isRunning = True
        else:
            isRunning = False
        statuscursor.close()
        return isRunning

    def add_queries_to_DB(self, query_array):
        self.checkConnection()
        for query_hash in query_array:
            # exist_stmt = u"""
            # SELECT * FROM `queries`
            # WHERE `sha256_hash`
            # =%s
            # """
            query = query_array[query_hash]
            prepstatement = u"""INSERT INTO `queries`(`max_id`,
            `min_id`,`query`, `last_update`, `sha256_hash`,
            `next_update`, `penalty_level`,
            `total_tweets`) VALUES (
                    %s,%s,%s,%s,%s,%s,%s,
                    %s) ON DUPLICATE KEY UPDATE max_id=VALUES(max_id),
                    min_id=VALUES(min_id), last_update=VALUES(last_update)
                    , next_update=VALUES(next_update), penalty_level=
                    VALUES(penalty_level), total_tweets=VALUES(total_tweets)"""
            # self.log("\nQuery: %s" % query)
            self.cursor.execute(prepstatement,
                                (query["max_id"],
                                 query["min_id"],
                                 query["query"],
                                 query["last_update"],
                                 query["sha256_hash"],
                                 query["next_update"],
                                 query["penalty_level"],
                                 query["total_tweets"]))

    def add_query_to_DB(self, query):
        prepstatement = u"""INSERT INTO `queries`(`max_id`,
        `min_id`,`query`, `last_update`, `sha256_hash`,
        `next_update`, `penalty_level`,
        `total_tweets`) VALUES (
                %s,%s,%s,%s,%s,%s,%s,
                %s) ON DUPLICATE KEY UPDATE max_id=VALUES(max_id),
                min_id=VALUES(min_id), last_update=VALUES(last_update)
                , next_update=VALUES(next_update), penalty_level=
                VALUES(penalty_level), total_tweets=VALUES(total_tweets)"""

        self.cursor.execute(prepstatement,
                            (query["max_id"],
                             query["min_id"],
                             query["query"],
                             query["last_update"],
                             query["sha256_hash"],
                             query["next_update"],
                             query["penalty_level"],
                             query["total_tweets"]))

    def is_exact_retweet(self, tweet):
        return self.exact_tweet_re.match(tweet["text"]) or tweet["retweeted_status"]["text"] == tweet["text"]

    def gen_query_hash(self, query_text):
        return hashlib.sha256(query_text.encode("utf8")).hexdigest()

    def add_to_queries_list(self, query_text, tweet, queries_array):
        query_hash = self.gen_query_hash(query_text)
        if query_hash not in queries_array or queries_array[query_hash] is None:
            self.log("\nAdding new query to list")
            queries_array[query_hash] = {"query": query_text,
                                         "sha256_hash": query_hash,
                                         "max_id": tweet["id"],
                                         "min_id": tweet["id"],
                                         "next_update": int(time.time()),
                                         "penalty_level": 0,
                                         "total_tweets": 1,
                                         "last_update": int(time.time())}
        else:
            if(tweet["id"] >= queries_array[query_hash]["max_id"]):
                queries_array[query_hash]["max_id"] = tweet["id"]
            elif(tweet["id"] <= queries_array[query_hash]["min_id"]):
                queries_array[query_hash]["min_id"] = tweet["id"]
            queries_array[query_hash]["total_tweets"] += 1
            queries_array[query_hash]["last_update"] = int(time.time())

    def calculate_next_update(self, query, num_tweets_fetched):
        if num_tweets_fetched < self.DAILY_THRESHOLD:
            if("penalty_level" in query and query["penalty_level"] is not None):
                query["penalty_level"] += 1
            else:
                query["penalty_level"] = 1
            # Wait penalty_level days until next update
            query["next_update"] = int(time.time()) + (query["penalty_level"] * 24 * 60 * 60)
            self.log("\nPenalizing query by %s days. Next update: %s" % (query["penalty_level"], query["next_update"]))
        else:
            query["penalty_level"] = 0
            # Wait one day
            query["next_update"] = int(time.time()) + (24 * 60 * 60)

    def scrape_search_query(self, search_query, depth, min_id=-1, max_id=-1):
        try:
            self.log("\nScraping...")
            # self.con = connect_DB('localhost',
            #                       'root',
            #                       'preyfromslyfluxlinus',
            #                       'jcrawler')
            self.cursor = self.con.cursor()
            self.tso.set_keywords([search_query["query"]])
            self.tso.set_language('ja')  # search for japanese tweets only
            self.tso.set_locale('ja')

            if(self.global_min_id != -1):
                self.tso.set_since_id(self.global_min_id)
            elif(min_id != -1):
                self.tso.set_since_id(min_id)

            if(self.global_max_id != -1):
                self.tso.set_max_id(global_max_id)
            elif(max_id != -1):
                self.tso.set_max_id(max_id)

            sleep_time = 1
            last_num_of_queries = 0
            this_query_total_tweets = 0

            new_queries = {}

            # it's about time to create a TwitterSearch object with our secret
            # tokens
            if self.ts is None:
                self.ts = TwitterSearch(
                    consumer_key='lHw0Fte6wfJnzxMrA9nRxqJJN',
                    consumer_secret='UQmsX0hC9wvuzhaMLZ4OpB'
                    'FqfI4vYdMrHSNt0FYEmFGcsYU0iK',
                    access_token='4920693635-jk0qpriUrztwA2'
                    'a7dOwp4EhQI86qHt4xLbq7uPU',
                    access_token_secret='3mTFWM8leIXQzaiqnH'
                    'hwbspN6BzB5O8qMYKDnrgqHCKBz'
                )

            start = int(time.time())

            for tweet in self.ts.search_tweets_iterable(self.tso):
                self.ReportStatus(1)
                if (('coordinates' in tweet and
                     tweet["coordinates"] is not None and
                     'coordinates' in tweet["coordinates"] and
                     tweet["coordinates"]["coordinates"] and
                     tweet["coordinates"]["coordinates"][0] is not None)):
                    has_coordinates = 1
                    longitude = tweet["coordinates"]["coordinates"][0]
                    latitude = tweet["coordinates"]["coordinates"][1]
                else:
                    has_coordinates = 0
                    longitude = 181
                    latitude = 91

                if (('retweeted_status' in tweet and
                     tweet["retweeted_status"] is not None and
                     'id' in tweet["retweeted_status"] and
                     tweet["retweeted_status"]["id"] is not None)):
                    is_retweet = 1
                    retweet_id_int = tweet["retweeted_status"]["id"]
                    retweet_id_str = tweet["retweeted_status"]["id_str"]
                    if(self.is_exact_retweet(tweet)):
                        is_exact_retweet = 1
                        # Don't bother to save exact retweets anymore
                        continue
                    else:
                        is_exact_retweet = 0
                else:
                    is_retweet = 0
                    retweet_id_int = -1
                    retweet_id_str = ""
                    is_exact_retweet = 0

                this_query_total_tweets += 1
                self.total_tweets_grabbed += 1

                # Write to DB

                # Prepare and execute raw_tweet query
                prepstatement = u"""INSERT INTO `raw_tweets`(`id`, `text`
                , `created_at`, `lang`, `retweet_count`, `source`
                , `user_id`, `has_coordinates`, `longitude`
                , `latitude`, `id_int`, `user_id_int`, `is_retweet`
                , `retweet_id_int`, `retweet_id`, `is_exact_retweet`) VALUES (
                %s,%s,%s,%s,%s,%s,%s,%s,
                %s,%s,%s,%s,%s,%s,%s,%s) ON DUPLICATE KEY UPDATE id=id"""

                self.cursor.execute(prepstatement,
                                    (tweet["id_str"],
                                     tweet["text"],
                                     tweet["created_at"],
                                     tweet["lang"],
                                     tweet["retweet_count"],
                                     tweet["source"],
                                     tweet["user"]["id_str"],
                                     has_coordinates,
                                     longitude,
                                     latitude,
                                     tweet["id"],
                                     tweet["user"]["id"],
                                     is_retweet,
                                     retweet_id_int,
                                     retweet_id_str,
                                     is_exact_retweet))

                rawjsonstmt = u"""INSERT INTO `raw_tweets_json`(`id`, `id_int`
                , `raw_json`) VALUES (
                %s,%s,%s) ON DUPLICATE KEY UPDATE id=id"""

                self.cursor.execute(rawjsonstmt,
                                    (tweet["id_str"],
                                     tweet["id"],
                                     json.dumps(tweet)))

                # Also store the user data
                user = tweet["user"]
                userstatement = u"""INSERT INTO `twitter_users`(`user_id`, `created_at`, `description`
                , `followers_count`, `friends_count`, `geo_enabled`, `lang`
                , `location`
                , `name`, `protected`, `screen_name`, `statuses_count`
                , `time_zone`
                , `url`, `utc_offset`
                , `verified`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s
                ,%s,%s) ON DUPLICATE KEY UPDATE `description`=
                VALUES(`description`)
                , `friends_count`=VALUES(`friends_count`)
                , `geo_enabled`=VALUES(`geo_enabled`)
                , `lang`=VALUES(`lang`)
                , `location`=VALUES(`location`)
                , `name`=VALUES(`name`)
                , `protected`=VALUES(`protected`)
                , `screen_name`=VALUES(`screen_name`)
                , `statuses_count`=VALUES(`statuses_count`)
                , `time_zone`=VALUES(`time_zone`)
                , `url`=VALUES(`url`)
                , `utc_offset`=VALUES(`utc_offset`)
                """

                self.cursor.execute(userstatement,
                                    (user["id_str"],
                                     user["created_at"],
                                        user["description"],
                                        user["followers_count"],
                                        user["friends_count"],
                                        user["geo_enabled"],
                                        user["lang"],
                                        user["location"],
                                        user["name"],
                                        user["protected"],
                                        user["screen_name"],
                                        user["statuses_count"],
                                        user["time_zone"],
                                        user["url"],
                                        user["utc_offset"],
                                        user["verified"]
                                     ))

                hashtags = tweet["entities"]["hashtags"]

                for hashtag in hashtags:
                    hashtagstatement = u""" INSERT INTO `hashtags`(`tweet_id`
                    , `hashtag`, `start`
                    , `end`) VALUES (%s,%s,%s,%s)
                     ON DUPLICATE KEY UPDATE tweet_id=tweet_id
                    """
                    self.cursor.execute(hashtagstatement, (
                        tweet["id_str"],
                        hashtag["text"],
                        hashtag["indices"][0],
                        hashtag["indices"][1]
                    ))
                    self.add_to_queries_list(hashtag["text"], tweet, new_queries)

                user_mentions = tweet["entities"]["user_mentions"]
                for mention in user_mentions:
                    mentionstatement = u""" INSERT INTO `mentions`(`tweet_id`
                    , `user_id`
                    , `start`
                    , `end`) VALUES (%s,%s,%s,%s)
                     ON DUPLICATE KEY UPDATE tweet_id=tweet_id
                     """
                    self.cursor.execute(mentionstatement, (
                        tweet["id_str"],
                        mention["id_str"],
                        mention["indices"][0],
                        mention["indices"][1]
                    ))

                embedded_urls = tweet["entities"]["urls"]
                for url in embedded_urls:
                    url_statement = u"""INSERT INTO `embedded_urls`(
                    `tweet_id`
                    , `url`
                    , `start`
                    , `end`)
                     VALUES (%s,%s,%s,%s)
                     ON DUPLICATE KEY UPDATE tweet_id=tweet_id
                    """
                    self.cursor.execute(url_statement, (
                        tweet["id_str"],
                        url["url"],
                        url["indices"][0],
                        url["indices"][1]
                    ))

                # TODO: Investigate crash due tolack of x-rate-limit-remaining key
                if 'x-rate-limit-remaining' in self.ts.get_metadata():
                    self.log("\nCurrent rate-limiting status: %s" %
                             self.ts.get_metadata()['x-rate-limit-remaining'])

                # self.log('\n@%s tweeted: %s'
                # % (tweet['user']['screen_name'], tweet['text']))

                self.add_queries_to_DB(new_queries)

                # Execute queries for real
                self.con.commit()

                # if current_num_of_queries is different
                # from last_num_of_queries,
                # then a new page was loaded by TwitterSearch
                current_num_of_queries = self.ts.get_statistics()[0]
                if not last_num_of_queries == current_num_of_queries:
                    # Calculate current query rate
                    now = int(time.time())
                    if('x-rate-limit-remaining' in self.ts.get_metadata()):
                        reset = int(self.ts.get_metadata()['x-rate-limit-reset'])
                    else:
                        reset = 0

                    if((now - start) == 0):
                        rate = this_query_total_tweets
                    else:
                        rate = this_query_total_tweets / (now - start)

                    self.log("\nCurrent API query rate: %s queries / s" % rate)
                    self.log("\nCUrrent tweets processed: %s" % self.total_tweets_grabbed)

                    # Stop with 3 queries left before hitting the limit
                    # just to be safe
                    if ('x-rate-limit-remaining' in self.ts.get_metadata() and
                            int(self.ts.get_metadata()['x-rate-limit-remaining']) <= 3 and
                            now < reset):
                        longsleeptime = reset - now + 60
                        self.log(
                            '\nSleeping %s seconds \n(x-rate-limit-remaining= %s,'
                            ' \nx-rate-limit-reset=%s,'
                            ' \ntime now=%s)'
                            % (
                                longsleeptime,
                                self.ts.get_metadata()[
                                    'x-rate-limit-remaining'],
                                self.ts.get_metadata()['x-rate-limit-reset'],
                                now
                            )
                        )
                        self.ReportStatus(2)
                        time.sleep(longsleeptime)
                        if(this_query_total_tweets >= depth):
                            break
                    elif ('x-rate-limit-remaining' in self.ts.get_metadata() and
                          int(self.ts.get_metadata()['x-rate-limit-remaining']) <= 3 and
                          now >= reset):
                        # Wait a minute just in case there is a discrepency
                        # between the rate limit we've been given and the
                        # actual
                        self.log(
                            '\nSleeping 60 seconds (x-rate-limit-remaining= %s,'
                            ' x-rate-limit-reset=%s,'
                            ' time now=%s)'
                            % (
                                self.ts.get_metadata()[
                                    'x-rate-limit-remaining'],
                                self.ts.get_metadata()['x-rate-limit-reset'],
                                now
                            )
                        )
                        self.ReportStatus(2)
                        time.sleep(60)
                        if(this_query_total_tweets >= depth):
                            break
                    elif('x-rate-limit-remaining' not in self.ts.get_metadata()):
                        self.log("\nx-rate-limit-remaining missing! Will sleep.")
                        self.ReportStatus(2)
                        time.sleep(900)
                    else:
                        last_num_of_queries = self.ts.get_statistics()[0]
                        # Wait between queries
                        self.log(
                            '\nSleeping \n(current_num_of_queries= %s,'
                            ' \nlast_num_of_queries=%s'
                            ', \nx-rate-limit-remaining= %s,'
                            ' \nx-rate-limit-reset=%s)'
                            % (
                                current_num_of_queries,
                                last_num_of_queries,
                                self.ts.get_metadata()[
                                    'x-rate-limit-remaining'],
                                self.ts.get_metadata()['x-rate-limit-reset']
                            )
                        )
                        self.ReportStatus(2)
                        time.sleep(sleep_time)
                        if(this_query_total_tweets >= depth):
                            break

                # Update query counter
                last_num_of_queries = self.ts.get_statistics()[0]

            self.log("\nThis query total tweets processed: %s" % this_query_total_tweets)
            return this_query_total_tweets
        except TwitterSearchException as e:
            if(e.code == 420):
                self.log("\nBlaze it for a hot sec: we're being rate limited. Waiting 15 minutes...")
                self.ReportStatus(2)
                time.sleep(900)
                self.log("\nAttempting to restart scrape process...")
                return self.scrape_search_query(search_query, depth, min_id, max_id)
            elif(e.code == 403 or e.code == 503):
                self.log("\nTwitter is either experiencing high load or we are being rate limited. Waiting 1 hour...")
                self.ReportStatus(2)
                time.sleep(3600)
                self.log("\nAttempting to restart scrape process...")
                return self.scrape_search_query(search_query, depth, min_id, max_id)
            else:
                # self.log(e)
                logging.exception("TwitterSearch had an error:")
                print(e)
        except pymysql.Error as e:
            self.log("\nError %d: %s" % (e.args[0], e.args[1]))
            if(e.args[0] == 2006):
                self.log("\nDetected dead MySQL server, waiting 30 seconds for server to restart...")
                self.ReportStatus(2)
                time.sleep(30)
                self.log("\nAttempting to restart scrape process...")
                return self.scrape_search_query(search_query, depth, min_id, max_id)
        else:
            self.log("\nQuitting")
            if con:
                con.close()
            if self.logfile:
                self.logfile.close()
            return this_query_total_tweets