def get(self, crawlID): logging.info("Starting execution of task " + crawlID) crawl = Crawl.get_by_id(long(crawlID)) startTime = time.time() crawl.status = "RUNNING" crawl.save() """ Download VoID """ voidURI = crawl.dataset.voidURI logging.info("Downloading VoID from " + voidURI) result = urlfetch.fetch(voidURI) logging.info("Completed download from " + voidURI + " with HTTP " + str(result.status_code)) if result.status_code != 200: crawl.changeDetected = False crawl.status = "ERROR" crawl.message = "VoID Download Failed" else: """ Compute VoID hash """ voidHash = self.computeHash(result.content) if crawl.dataset.voidHash != voidHash: """ VoID changed - update DB """ logging.info("VoID file at " + voidURI + " changed") crawl.dataset.voidHash = voidHash crawl.dataset.save() self.processVoID(result.content, crawl) """ No matter what the task status for the data dumps """ """ is - just overwrite with info about VoID change """ crawl.changeDetected = True crawl.message = "VoID file changed" else: """ No change in VoID """ logging.info("No changes in VoID file " + voidURI) self.processVoID(result.content, crawl) """ Complete task log with timing info """ crawl.finishedAt = datetime.datetime.utcnow() crawl.duration = int(time.time() - startTime) crawl.save()
def view_crawl(self, mid, cid): robot = Robot.get_by_id(int(mid), parent=self.current_user.key) crawl = Crawl.get_by_id(int(cid), parent=robot.key) jobs = crawl.jobs self.render_response('robot/crawl.html', robot=robot, crawl=crawl, jobs=jobs)