Beispiel #1
0
def get_image(image_url, actually_store=True):
    """
    This is the job that gets queued when a tweet needs to be analyzed
    """
    redis_queue = utils.get_rq_redis_conn()

    start = time.time()  #Lets store some timing info
    image = retrieve_image(image_url)  #Go get that image
    if image is not None:  #As long as we have a vaid image and its not None (aka Null)
        image = process_image(image)  #Do our image processings
        if actually_store:  #If our configu file says to store the image for reals
            key = store_to_vipr(image)  #store to vipr
            store_to_redis(key)  #and keep track of it in redis
    end = time.time()  # and record how long it took
    if random.randint(1, 10) < 5:  #about 50% of the time we should
        redis_queue.lpush("stats:execution-times", end - start)  #send in an update on execution time
    redis_queue.incr("stats:tweets-processed")  #and also record that we processed another tweet.
Beispiel #2
0
def watch_stream():
    twitter_creds = utils.twitter_creds()
    redis_queue = utils.get_rq_redis_conn()
    hashtag = redis_queue.get("hashtag")
    q = utils.get_rq()

    twitter_api = TwitterAPI(
        consumer_key=twitter_creds['consumer_key'].encode('ascii','ignore'),
        consumer_secret=twitter_creds['consumer_secret'].encode('ascii','ignore'),
        access_token_key=twitter_creds['access_token'].encode('ascii','ignore'),
        access_token_secret=twitter_creds['token_secret'].encode('ascii','ignore')
    )  #setup the twitter streaming connectors.

    watcher_logger.info("Waiting for tweets...")
    while True:
            try:
                for tweet in twitter_api.request('statuses/filter', {'track': hashtag}).get_iterator():  #for each one of thise
                    if hashtag != redis_queue.get("hashtag"):
                        watcher_logger.info("Hashtag changed from {}, breaking loop to restart with new hashtag".format(hashtag))
                        hashtag = redis_queue.get("hashtag")
                        break
                    #watcher_logger.debug("Tweet Received: {}".format(hashtag))  #Log it
                    redis_queue.incr("stats:tweets")  #Let Redis know we got another one.
                    watcher_logger.debug("received tweet with tag {}".format(hashtag))
                    try:
                        if tweet['entities']['media'][0]['type'] == 'photo': #Look for the photo.  If its not there, will throw a KeyError, caught below
                            if 'retweeted' not in tweet:
                                watcher_logger.info("Tweet was a RT - ignoring")
                                continue
                            watcher_logger.info("Dispatching tweet ({}) with URL {}".format(hashtag,tweet['entities']['media'][0]['media_url']))  # log it
                            q.enqueue(
                                get_image,
                                tweet['entities']['media'][0]['media_url'],
                                ttl=60,
                                result_ttl=60,
                                timeout=60
                            )  #add a job to the queue, calling get_image() with the image URL and a timeout of 60s
                    except KeyError as e:
                        watcher_logger.debug("Caught a key error for tweet, expected behavior, so ignoring: {}".format(e.message))
                    except Exception as e:
                        watcher_logger.critical("UNEXPECTED EXCEPTION: {}".format(e))
            except httplib.IncompleteRead as e:
                watcher_logger.warn("HTTP Exception {}".format(e))
            except ProtocolError as e:
                watcher_logger.warn("Protocol Exception {}".format(e))
Beispiel #3
0
from rq.decorators import job #funtion decoration
import dweepy #see dweet.io
import logging, logging.config
import utils
import datetime

logging.config.dictConfig(utils.get_log_dict())

logger = logging.getLogger('vascodagama.dashboard')


redis_images = utils.get_images_redis_conn()
r = utils.get_rq_redis_conn()
q = utils.get_rq()

#Setup our redis and RQ connections.   see twitter_watch for more details.
configstuff = utils.configstuff()

@job("dashboard", connection=r, timeout=10, result_ttl=10)
def send_update(metric, value): #method for sending updates about metrics as needed.
    logger.debug("Sending update for {}: {}".format(metric, value))
    dweepy.dweet_for(configstuff['dweet_thing'], {metric: value})




def update_dashboard(): # the primary function.
    logger.info("updating")

Beispiel #4
0
import json  #json functions

logging.config.dictConfig(utils.get_log_dict())
worker_logger = logging.getLogger("vascodagama.worker")
watcher_logger = logging.getLogger("vascodagama.watcher")

logger = logging.getLogger('vascodagama.images')

#setup flask
app = Flask(__name__)

#connect to redis
redis_images = utils.get_images_redis_conn()

#Setup a connection that will be used by RQ (each redis connection instance only talks to 1 DB)
redis_queue = utils.get_rq_redis_conn()


#gets a list of random URLS from refis.
def get_random_urls(count=100):
    pipe_keys = redis_images.pipeline()  #setup 2 batches
    pipe_urls = redis_images.pipeline()
    keys = []
    for i in range(0, count):  # get 'count' random keys
        pipe_keys.randomkey()

    for key in pipe_keys.execute():  #for each one of those random keys
        pipe_urls.hget(key, "url")  #get the URL property.

    urls = pipe_urls.execute()  #the list of URLs is the result.
    return list(set(urls))  #return it.
Beispiel #5
0
import json #json functions

logging.config.dictConfig(utils.get_log_dict())
worker_logger = logging.getLogger("vascodagama.worker")
watcher_logger = logging.getLogger("vascodagama.watcher")

logger = logging.getLogger('vascodagama.images')

#setup flask
app = Flask(__name__)

#connect to redis
redis_images = utils.get_images_redis_conn()

#Setup a connection that will be used by RQ (each redis connection instance only talks to 1 DB)
redis_queue = utils.get_rq_redis_conn()

#gets a list of random URLS from refis.
def get_random_urls(count=100):
    pipe_keys = redis_images.pipeline() #setup 2 batches
    pipe_urls = redis_images.pipeline()
    keys = []
    for i in range(0,count): # get 'count' random keys
        pipe_keys.randomkey()

    for key in pipe_keys.execute(): #for each one of those random keys
        pipe_urls.hget(key,"url") #get the URL property.

    urls = pipe_urls.execute() #the list of URLs is the result.
    return list(set(urls)) #return it.
Beispiel #6
0
from rq.decorators import job  #funtion decoration
import dweepy  #see dweet.io
import logging, logging.config
import utils
import datetime

logging.config.dictConfig(utils.get_log_dict())

logger = logging.getLogger('vascodagama.dashboard')

redis_images = utils.get_images_redis_conn()
r = utils.get_rq_redis_conn()
q = utils.get_rq()

#Setup our redis and RQ connections.   see twitter_watch for more details.
configstuff = utils.configstuff()


@job("dashboard", connection=r, timeout=10, result_ttl=10)
def send_update(metric,
                value):  #method for sending updates about metrics as needed.
    logger.debug("Sending update for {}: {}".format(metric, value))
    dweepy.dweet_for(configstuff['dweet_thing'], {metric: value})


def update_dashboard():  # the primary function.
    logger.info("updating")

    #For each one of the metrics, collected the data and issue a job to actually send that out.
    get_queue_len()
    # logging.debug("{}: {}".format("tweets in queue",tweets_in_queue))