import lib.eventapp as eventapp # bring in the event handler functions import handlers.page as page import handlers.image as image # read in the context from context import context # add our debug flag to config context.add('debug', debug) # set our config into the context config = context.get('get_config')(debug, 'scraper') context.add('config', config) # configure eventapp to have the # of threads / forks we want thread_count = config.get('knobs').get('threads_per_stage') fork_count = config.get('knobs').get('forks') threaded = config.get('knobs').get('threaded') multiprocess = config.get('knobs').get('multiprocess') # set up our event app app = EventApp('blog_scraper', config, context, # HANDLERS
# what dir is this file in ? here = dirname(abspath(__file__)) # update our python path to be at root of project base = dirname(here) sys.path.insert(0, base) # we default to production debug = 'debug' in sys.argv print 'debug: %s' % debug # CONTEXT from context import context # set our config into the context config = context.get('get_config')(debug, 'scraper') context.add('config', config) # TODO: add to config MIN_DISTANCE = config.get('similar', 'min_distance') MIN_DISTANCE = 5 Image = context.get('get_Image')() SimilarImage = context.get('get_SimilarImage')() compare_vhash = context.get('compare_vhash') # go through each image's vhash finding # images it's similar to other images def do_compare(im):
# updating path to be root of project from os.path import dirname, abspath import sys print 'adding: %s' % dirname(abspath('.')) sys.path.insert(0, abspath('.')) # bring in the global context from context import context # we default to production debug = 'debug' in sys.argv print 'debug: %s' % debug # set our config into the context config = context.get('get_config')(debug, 'update_attrs') context.add('config', config) # compute each images vhash compute = context.get('compute_vhash') get_Image = context.get('get_Image') images = get_Image().collection.find({'vhash': None, 'downloaded': True}) # start up a worker pool pool = ThreadPool(5) def do_vhash(i, image): print ('\n[compute vhash] (%s/%s)\t%s' % ( i, total_images-i, image.short_hash)), image_data = context.create_partial(image.get_data)()
# CONTEXT from context import context # and our helpers import lib.helpers as helpers # FLASK it up ! from flask import Flask, Response, redirect # create our app app = Flask('image-scraper') # set our config into the context config = context.get('get_config')(debug, 'scraper') context.add('config', config) # instantiate a revent client revent_client = context.get('get_revent')(**config.get('revent')) # add the revent client to context context.add('revent', revent_client) # add our debug flag to config context.add('debug', debug) # OUR WSGI HANDLERS