예제 #1
0
    def test_rate_limit(self, api, wait=True, buffer=.1):
        """
        Tests whether the rate limit of the last request has been reached.
        :param api: The `tweepy` api instance.
        :param wait: A flag indicating whether to wait for the rate limit reset
                    if the rate limit has been reached.
        :param buffer: A buffer time in seconds that is added on to the waiting
                    time as an extra safety margin.
        :return: True if it is ok to proceed with the next request. False otherwise.
        """
        # Get the number of remaining requests
        remaining = int(api.last_response.headers['x-rate-limit-remaining'])
        # Check if we have reached the limit
        if remaining == 0:
            limit = int(api.last_response.headers['x-rate-limit-limit'])
            reset = int(api.last_response.headers['x-rate-limit-reset'])
            # Parse the UTC time
            reset = datetime.fromtimestamp(reset)
            # Let the user know we have reached the rate limit
            log.info("0 of %d requests remaining until %d.", limit, reset)

            if wait:
                # Determine the delay and sleep
                delay = (reset - datetime.now()).total_seconds() + buffer
                log.info("Sleeping for %d", delay)
                sleep(delay)
                # We have waited for the rate limit reset. OK to proceed.
                return True
            else:
                # We have reached the rate limit. The user needs to handle the
                # rate limit manually.
                return False

        # We have not reached the rate limit
        return True
 def execute(self):
     """Execute the twitter crawler, loop into the keyword_list
     """
     listen = TwitterStream(self.tw_store)
     stream = tweepy.Stream(self.auth, listen)
     loop = True
     while loop:
         try:
             log.info("Start stream tweets data")
             stream.filter(locations=AUS_GEO_CODE)
             loop = False
             log.info("End stream tweets data")
         except Exception as e:
             log.error("There's an error, sleep for 10 minutes")
             log.error(e)
             loop = True
             stream.disconnect()
             time.sleep(600)
             continue
예제 #3
0
 def execute(self):
     """Execute the twitter crawler, loop into the keyword_list"""
     while True:
         log.info("Star crawling back....")
         delay = 600
         for keyword in self.keyword_list:
             log.info('Crawl data for %s', keyword["keyword"])
             try:
                 self.crawl(keyword)
             except Exception as e:
                 log.error('Error in Crawling process', exc_info=True)
                 log.info("Sleeping for %ds", delay)
                 sleep(delay)
         # Sleep for 10 minutes after finishing crawl all of the keyword,
         # and start over again
         log.info("Sleeping for %ds...", delay)
         sleep(delay)
 def on_timeout(self):
     """ Handle time out when API reach its limit """
     log.info("API Reach its limit, sleep for 10 minutes")
     time.sleep(600)
     return
except:
    hashtags = server["twitter-hashtags"]

# get twitter-words couchdb instance
try:
    words = server.create["twitter-words"]
except:
    words = server["twitter-words"]

# get twitter-users couchdb instance
try:
    user = server.create["twitter-users"]
except:
    user = server["twitter-users"]

log.info("START - Processing analytics data")

analytic_db = AnalyticsLog(database.con, database.meta)
date_list = analytic_db.fetch_unprocessed_data()

# fetch data for individual date
for date_for_analysis in date_list:
    log.info("START - Process data for %s", date_for_analysis)
    view_data = []

    for row in db.view('_design/analytics/_view/tweets-victoria',\
                         startkey=date_for_analysis, endkey=date_for_analysis):
        view_data.append(row.value)

    log.info("Processing %d row of data", len(view_data))
import math
import couchdb

from app.sentiment_analysis import SentimentAnalysis
from app.logger import LOGGER as log

import settings

ALL_DOCS_VIEW = '_all_docs'

try:
    log.info("START db updater script")
    log.info("-----------------------")
    server = couchdb.Server(url=settings.COUCHDB_SERVER)
    db = server[settings.COUCHDB_DB]

    info = db.info()
    doc_count = info["doc_count"]
    num_per_request = 10000

    iteration = math.ceil(doc_count / num_per_request)

    for i in range(iteration):
        log.info('Run %d iteration' % i)
        for row in db.view(ALL_DOCS_VIEW,
                           limit=num_per_request,
                           skip=i * num_per_request):
            data = db.get(row.id)
            data["sentiment"] = SentimentAnalysis.get_sentiment(data["text"])
            db.save(data)
        log.info('%d iteration success')