예제 #1
0
import lib.eventapp as eventapp


# bring in the event handler functions
import handlers.page as page
import handlers.image as image


# read in the context
from context import context

# add our debug flag to config
context.add('debug', debug)

# set our config into the context
config = context.get('get_config')(debug, 'scraper')
context.add('config', config)


# configure eventapp to have the # of threads / forks we want
thread_count = config.get('knobs').get('threads_per_stage')
fork_count = config.get('knobs').get('forks')

threaded = config.get('knobs').get('threaded')
multiprocess = config.get('knobs').get('multiprocess')


# set up our event app
app = EventApp('blog_scraper', config, context,

               # HANDLERS
예제 #2
0
# what dir is this file in ?
here = dirname(abspath(__file__))

# update our python path to be at root of project
base = dirname(here)
sys.path.insert(0, base)

# we default to production
debug = 'debug' in sys.argv
print 'debug: %s' % debug

# CONTEXT
from context import context

# set our config into the context
config = context.get('get_config')(debug, 'scraper')
context.add('config', config)

# TODO: add to config
MIN_DISTANCE = config.get('similar', 'min_distance')
MIN_DISTANCE = 5

Image = context.get('get_Image')()
SimilarImage = context.get('get_SimilarImage')()
compare_vhash = context.get('compare_vhash')

# go through each image's vhash finding
# images it's similar to other images


def do_compare(im):
예제 #3
0
# updating path to be root of project
from os.path import dirname, abspath
import sys
print 'adding: %s' % dirname(abspath('.'))
sys.path.insert(0, abspath('.'))

# bring in the global context
from context import context

# we default to production
debug = 'debug' in sys.argv
print 'debug: %s' % debug

# set our config into the context
config = context.get('get_config')(debug, 'update_attrs')
context.add('config', config)

# compute each images vhash
compute = context.get('compute_vhash')
get_Image = context.get('get_Image')
images = get_Image().collection.find({'vhash': None,
                                      'downloaded': True})

# start up a worker pool
pool = ThreadPool(5)

def do_vhash(i, image):
    print ('\n[compute vhash] (%s/%s)\t%s' % (
                i, total_images-i, image.short_hash)),
    image_data = context.create_partial(image.get_data)()
예제 #4
0
# CONTEXT
from context import context


# and our helpers
import lib.helpers as helpers

# FLASK it up !
from flask import Flask, Response, redirect

# create our app
app = Flask('image-scraper')

# set our config into the context
config = context.get('get_config')(debug, 'scraper')
context.add('config', config)

# instantiate a revent client
revent_client = context.get('get_revent')(**config.get('revent'))

# add the revent client to context
context.add('revent', revent_client)

# add our debug flag to config
context.add('debug', debug)



# OUR WSGI HANDLERS