def post(self, request, *args, **kwargs): crawl_object = self.get_object() # Start if request.POST['action'] == "start": # Try to ping celery to see if it is ready. If the response is an # empty list, status is NOT READY. If there is an error connecting to # with redis, celery status is REDIS ERROR. try: celery_status = "READY" if celery.current_app.control.ping() else "CELERY ERROR" except ConnectionError: celery_status = "REDIS ERROR" if celery_status in ["REDIS ERROR", "CELERY ERROR"]: crawl_object.status = celery_status crawl_object.save() return HttpResponse(json.dumps(dict( status=crawl_object.status, )), content_type="application/json") else: crawl_object.status = "STARTING" crawl_object.save() if crawl_object.crawler == "ache": ache.delay(crawl_object) else: crawl_object.rounds_left = int(request.POST["rounds"]) crawl_object.save() nutch.delay(crawl_object) return HttpResponse(json.dumps(dict( status=crawl_object.status, )), content_type="application/json") # Stop elif request.POST['action'] == "stop": crawl_path = crawl_object.get_crawl_path() if crawl_object.crawler == "ache": crawl_object.status = 'STOPPED' crawl_object.save() os.killpg(crawl_object.celerytask.pid, 9) if crawl_object.crawler == "nutch": crawl_object.rounds_left = 1 crawl_object.save() touch(join(crawl_path, 'stop')) return HttpResponse(json.dumps(dict( status="STOP SIGNAL SENT")), content_type="application/json") # Common Crawl Dump elif request.POST['action'] == "ccadump": crawl_object.status = "DUMPING" crawl_object.save() cca_dump(self.get_object()) return HttpResponse("Success") # Dump Images elif request.POST['action'] == "dump": self.dump_images() return HttpResponse("Success") # Force Stop Nutch elif request.POST['action'] == "force_stop": touch(join(crawl_object.get_crawl_path(), 'stop')) os.killpg(crawl_object.celerytask.pid, 9) crawl_object.status = "FORCE STOPPED" crawl_object.save() return HttpResponse(json.dumps(dict( status="FORCE STOPPED")), content_type="application/json") # Update status, statistics elif request.POST['action'] == "status": if crawl_object.status not in ["REDIS ERROR", "CELERY ERROR", "NOT STARTED", "STOPPED", "FORCE STOPPED"]: crawl_object.status = crawl_object.celerytask.task.status crawl_object.save() if crawl_object.crawler == "ache": ache_log_statistics(crawl_object) return HttpResponse(json.dumps(dict( status=crawl_object.status, harvest_rate=crawl_object.harvest_rate, pages_crawled=crawl_object.pages_crawled, rounds_left=crawl_object.rounds_left, )), content_type="application/json") return HttpResponse(json.dumps(dict( args=args, kwargs=kwargs, post=request.POST)), content_type="application/json")
def post(self, request, *args, **kwargs): crawl_object = self.get_object() # Start if request.POST['action'] == "start": crawl_object.status = "STARTING" crawl_object.save() if crawl_object.crawler == "ache": ache.delay(crawl_object) else: crawl_object.rounds_left = int(request.POST["rounds"]) crawl_object.save() nutch.delay(crawl_object) return HttpResponse(json.dumps(dict( status="STARTING")), content_type="application/json") # Stop elif request.POST['action'] == "stop": crawl_path = crawl_object.get_crawl_path() if crawl_object.crawler == "ache": crawl_object.status = 'STOPPED' crawl_object.save() os.killpg(crawl_object.crawltask.pid, 9) if crawl_object.crawler == "nutch": crawl_object.rounds_left = 1 crawl_object.save() touch(join(crawl_path, 'stop')) return HttpResponse(json.dumps(dict( status="STOP SIGNAL SENT")), content_type="application/json") # Dump Images elif request.POST['action'] == "dump": self.dump_images() return HttpResponse("Success") # Force Stop Nutch elif request.POST['action'] == "force_stop": touch(join(crawl_object.get_crawl_path(), 'stop')) os.killpg(crawl_object.crawltask.pid, 9) crawl_object.status = "FORCE STOPPED" crawl_object.save() return HttpResponse(json.dumps(dict( status="FORCE STOPPED")), content_type="application/json") # Update status, statistics elif request.POST['action'] == "status": if crawl_object.status not in ["NOT STARTED", "STOPPED", "FORCE STOPPED"]: crawl_object.status = crawl_object.crawltask.task.status crawl_object.save() if crawl_object.crawler == "ache": ache_log_statistics(crawl_object) return HttpResponse(json.dumps(dict( status=crawl_object.status, harvest_rate=crawl_object.harvest_rate, pages_crawled=crawl_object.pages_crawled, rounds_left=crawl_object.rounds_left, )), content_type="application/json") # TESTING reflect POST request return HttpResponse(json.dumps(dict( args=args, kwargs=kwargs, post=request.POST)), content_type="application/json")
def post(self, request, *args, **kwargs): crawl_object = self.get_object() # Start if request.POST['action'] == "start": # Try to ping celery to see if it is ready. If the response is an # empty list, status is NOT READY. If there is an error connecting to # with redis, celery status is REDIS ERROR. try: celery_status = "READY" if celery.current_app.control.ping( ) else "CELERY ERROR" except ConnectionError: celery_status = "REDIS ERROR" if celery_status in ["REDIS ERROR", "CELERY ERROR"]: crawl_object.status = celery_status crawl_object.save() return HttpResponse(json.dumps( dict(status=crawl_object.status, )), content_type="application/json") else: crawl_object.status = "STARTING" crawl_object.save() if crawl_object.crawler == "ache": ache.delay(crawl_object) else: crawl_object.rounds_left = int(request.POST["rounds"]) crawl_object.save() nutch.delay(crawl_object) return HttpResponse(json.dumps( dict(status=crawl_object.status, )), content_type="application/json") # Stop elif request.POST['action'] == "stop": crawl_path = crawl_object.get_crawl_path() if crawl_object.crawler == "ache": crawl_object.status = "STOPPED" crawl_object.save() os.killpg(crawl_object.celerytask.pid, 9) if crawl_object.crawler == "nutch": crawl_object.status = "FINISHING" crawl_object.rounds_left = 1 crawl_object.save() touch(join(crawl_path, 'stop')) return HttpResponse(json.dumps(dict(status="STOP SIGNAL SENT")), content_type="application/json") # Common Crawl Dump elif request.POST['action'] == "ccadump": crawl_object.status = "DUMPING" crawl_object.save() cca_dump(self.get_object()) return HttpResponse("Success") # Dump Images elif request.POST['action'] == "dump": self.dump_images() return HttpResponse("Success") # Force Stop Nutch elif request.POST['action'] == "force_stop": touch(join(crawl_object.get_crawl_path(), 'stop')) os.killpg(crawl_object.celerytask.pid, 9) crawl_object.status = "FORCE STOPPED" crawl_object.save() return HttpResponse(json.dumps(dict(status="FORCE STOPPED")), content_type="application/json") # Update status, statistics elif request.POST['action'] == "status": # Do not update the status if the current status is any of # the following. This is to prevent errors or interface problems # when checking the status of a celery task. no_go_statuses = [ "FINISHING", "STOPPING", "REDIS ERROR", "CELERY ERROR", "NOT STARTED", "STOPPED", "FORCE STOPPED" ] if crawl_object.status not in no_go_statuses: crawl_object.status = crawl_object.celerytask.task.status crawl_object.save() if crawl_object.crawler == "ache": ache_log_statistics(crawl_object) return HttpResponse(json.dumps( dict( status=crawl_object.status, harvest_rate=crawl_object.harvest_rate, pages_crawled=crawl_object.pages_crawled, rounds_left=crawl_object.rounds_left, )), content_type="application/json") return HttpResponse(json.dumps( dict(args=args, kwargs=kwargs, post=request.POST)), content_type="application/json")
def post(self, request, *args, **kwargs): crawl_object = self.get_object() # Start if request.POST['action'] == "start": # Try to ping celery to see if it is ready. If the response is an # empty list, status is NOT READY. If there is an error connecting to # with redis, celery status is REDIS ERROR. try: celery_status = "READY" if celery.current_app.control.ping() else "CELERY ERROR" except ConnectionError: celery_status = "REDIS ERROR" if celery_status in ["REDIS ERROR", "CELERY ERROR"]: crawl_object.status = celery_status crawl_object.save() return HttpResponse(json.dumps(dict( status=crawl_object.status, )), content_type="application/json") else: crawl_object.status = "STARTING" crawl_object.save() if crawl_object.crawler == "ache": ache.delay(crawl_object) else: crawl_object.rounds_left = int(request.POST["rounds"]) crawl_object.save() nutch.delay(crawl_object) return HttpResponse(json.dumps(dict( status=crawl_object.status, )), content_type="application/json") # Stop elif request.POST['action'] == "stop": crawl_path = crawl_object.get_crawl_path() if crawl_object.crawler == "ache": crawl_object.status = "STOPPED" crawl_object.save() os.killpg(crawl_object.celerytask.pid, 9) if crawl_object.crawler == "nutch": crawl_object.status = "STOPPING" crawl_object.save() return HttpResponse(json.dumps(dict( status="STOPPING")), content_type="application/json") # Common Crawl Dump elif request.POST['action'] == "ccadump": crawl_object.status = "DUMPING" crawl_object.save() cca_dump(self.get_object()) crawl_object.status = "SUCCESS" crawl_object.save() return HttpResponse("Success") # Dump Images elif request.POST['action'] == "dump": # TODO - restore dump_images return HttpResponse("Success") # Update status, statistics elif request.POST['action'] == "status": # Do not update the status if the current status is any of # the following. This is to prevent errors or interface problems # when checking the status of a celery task. no_go_statuses = [ "FINISHING", "STOPPING", "REDIS ERROR", "CELERY ERROR", "NOT STARTED", "STOPPED", "FORCE STOPPED" ] if crawl_object.status not in no_go_statuses and crawl_object.crawler != 'nutch': crawl_object.status = crawl_object.celerytask.task.status crawl_object.save() if crawl_object.crawler == "ache": ache_log_statistics(crawl_object) return HttpResponse(json.dumps(dict( status=crawl_object.status, harvest_rate=crawl_object.harvest_rate, pages_crawled=crawl_object.pages_crawled, rounds_left=crawl_object.rounds_left, )), content_type="application/json") return HttpResponse(json.dumps(dict( args=args, kwargs=kwargs, post=request.POST)), content_type="application/json")