def get(self, jobid): # Let's do some ORM mapping and such... nothing really # exciting so far. dbhdl = tools.getdbhandle() metadata = sqlalchemy.MetaData(dbhdl) session = sqlalchemy.orm.sessionmaker(bind=dbhdl)() class Job(object): pass class Result(object): pass jobs = sqlalchemy.Table('jobs', metadata, autoload=True) results = sqlalchemy.Table('results', metadata, autoload=True) sqlalchemy.orm.mapper(Job, jobs) sqlalchemy.orm.mapper(Result, results) # If the job requested does not exist then we bail out. job = session.query(Job).get(jobid) if job == None: raise tornado.web.HTTPError(400) # Let's fetch all the image URLs encountered so far sorted alphabetically. ret = [] for result in session.query(Result.image).filter_by(job_id = jobid).group_by(Result.image).all(): ret.append('\"%s\"' % result.image) tools.putdbhandle(dbhdl) self.set_status(200) self.write('{ "images": [ %s ] }' % ',\n'.join(ret))
def _createjob(self, urls, depth): print >> sys.stderr, "-- received request for %s with max depth %s" % (",".join(urls), depth) # Let's do some ORM mapping and such... nothing really # exciting so far. dbhdl = tools.getdbhandle() metadata = sqlalchemy.MetaData(dbhdl) session = sqlalchemy.orm.sessionmaker(bind=dbhdl)() class Job(object): pass jobs = sqlalchemy.Table('jobs', metadata, autoload=True) sqlalchemy.orm.mapper(Job, jobs) # Actually create the job in the table. job = Job() job.nburls = len(urls) job.posted = datetime.datetime.now() session.add(job) session.commit() tools.putdbhandle(dbhdl) # Finally push a message for each URL. The message format is # <jobid>:<url>. Crawlers use the jobid while creating task and result # entries. mqhdl, mqchannel = tools.getmqhandle() for url in urls: tools.pushtomq(mqchannel, '{ "jobid": "%d",' ' "url": "%s",' ' "depth": "%s" }' % (job.id, url, depth)) tools.putmqhandle(mqhdl, mqchannel) self.set_header('Content-Type', 'text/plain') self.set_header('Location', '/{0}'.format(job.id)) self.set_status(201)
def get(self, jobid): # Let's do some ORM mapping and such... nothing really # exciting so far. dbhdl = tools.getdbhandle() metadata = sqlalchemy.MetaData(dbhdl) session = sqlalchemy.orm.sessionmaker(bind=dbhdl)() class Job(object): pass class Task(object): pass jobs = sqlalchemy.Table('jobs', metadata, autoload=True) tasks = sqlalchemy.Table('tasks', metadata, autoload=True) sqlalchemy.orm.mapper(Job, jobs) sqlalchemy.orm.mapper(Task, tasks) # If the job requested does not exist then we bail out. job = session.query(Job).get(jobid) if job == None: raise tornado.web.HTTPError(400) # Let's count how many URLs initially pushed with the job description # have been completed so far. nbcompleted = len(session.query(Task).filter_by(completed = True, job_id = jobid).all()) tools.putdbhandle(dbhdl) self.set_status(200) self.write('{ "result": {\n' ' "urls_completed": "%d",\n' ' "urls_requested": "%d",\n' ' "creation": "%s" } }\n' % (nbcompleted, job.nburls, job.posted))