def check_stuck_analysis(): """ In case the analysis is stuck for whatever reason, we should force the status "failed" to avoid special exceptions, we can just put this function as a cron to cleanup. """ logger.info("started check_stuck_analysis") running_jobs = Job.objects.filter(status="running") logger.info(f"checking if {len(running_jobs)} jobs are stuck") jobs_id_stuck = [] for running_job in running_jobs: now = get_now() difference = now - datetime.timedelta(minutes=25) if difference > running_job.received_request_time: logger.error(f"found stuck analysis, job_id:{running_job.id}." f"Setting the job to status to 'failed'") jobs_id_stuck.append(running_job.id) general.set_job_status(running_job.id, "failed") running_job.finished_analysis_time = get_now() running_job.save(update_fields=["finished_analysis_time"]) logger.info("finished check_stuck_analysis") return jobs_id_stuck
def set_report_and_cleanup(job_id, report): analyzer_name = report.get("name", "") job_repr = f"({analyzer_name}, job_id: #{job_id})" logger.info(f"STARTING set_report_and_cleanup for <-- {job_repr}.") job_object = None try: with transaction.atomic(): job_object = Job.object_by_job_id(job_id, transaction=True) job_object.analysis_reports.append(report) job_object.save(update_fields=["analysis_reports"]) if job_object.status == "failed": raise AlreadyFailedJobException() num_analysis_reports = len(job_object.analysis_reports) num_analyzers_to_execute = len(job_object.analyzers_to_execute) logger.info( f"REPORT: num analysis reports:{num_analysis_reports}, " f"num analyzer to execute:{num_analyzers_to_execute}" f" <-- {job_repr}." ) # check if it was the last analysis... # ..In case, set the analysis as "reported" or "failed" if num_analysis_reports == num_analyzers_to_execute: status_to_set = "reported_without_fails" # set status "failed" in case all analyzers failed failed_analyzers = 0 for analysis_report in job_object.analysis_reports: if not analysis_report.get("success", False): failed_analyzers += 1 if failed_analyzers == num_analysis_reports: status_to_set = "failed" elif failed_analyzers >= 1: status_to_set = "reported_with_fails" set_job_status(job_id, status_to_set) job_object.finished_analysis_time = get_now() job_object.save(update_fields=["finished_analysis_time"]) except AlreadyFailedJobException: logger.error( f"job_id {job_id} status failed. Do not process the report {report}" ) except Exception as e: logger.exception(f"job_id: {job_id}, Error: {e}") set_job_status(job_id, "failed", errors=[str(e)]) job_object.finished_analysis_time = get_now() job_object.save(update_fields=["finished_analysis_time"])
def ask_analysis_availability(request): data_received = request.data logger.info( f"ask_analysis_availability received request from {str(request.user)}." f"Data: {dict(data_received)}") serializer = serializers.JobAvailabilitySerializer( data=data_received, context={"request": request}) serializer.is_valid(raise_exception=True) serialized_data = serializer.validated_data analyzers, running_only, md5, minutes_ago = ( serialized_data["analyzers"], serialized_data["running_only"], serialized_data["md5"], serialized_data["minutes_ago"], ) if running_only: statuses_to_check = [models.Status.RUNNING] else: statuses_to_check = [ models.Status.RUNNING, models.Status.REPORTED_WITHOUT_FAILS, ] if len(analyzers) == 0: query = (Q(md5=md5) & Q(status__in=statuses_to_check) & Q(analyzers_requested__len=0)) else: query = (Q(md5=md5) & Q(status__in=statuses_to_check) & Q(analyzers_to_execute__contains=analyzers)) if minutes_ago: minutes_ago_time = get_now() - timedelta(minutes=minutes_ago) query = query & Q(received_request_time__gte=minutes_ago_time) try: last_job_for_md5 = models.Job.objects.filter(query).latest( "received_request_time") response_dict = { "status": last_job_for_md5.status, "job_id": str(last_job_for_md5.id), "analyzers_to_execute": last_job_for_md5.analyzers_to_execute, } except models.Job.DoesNotExist: response_dict = {"status": "not_available"} logger.debug(response_dict) return Response(response_dict, status=status.HTTP_200_OK)
def ask_analysis_result(request): """ Endpoint to retrieve the status and results of a specific Job based on its ID :param job_id: integer Job ID :return 200: if ok :return 500: if failed """ source = str(request.user) try: data_received = request.query_params logger.info(f""" ask_analysis_result received request from {source}. Data:{dict(data_received)} """) if "job_id" not in data_received: return Response({"error": "820"}, status=status.HTTP_400_BAD_REQUEST) job_id = data_received["job_id"] try: job = models.Job.objects.get(id=job_id) except models.Job.DoesNotExist: response_dict = {"status": "not_available"} else: response_dict = { "status": job.status, "results": job.analysis_reports, "job_id": str(job.id), } # adding elapsed time finished_analysis_time = getattr(job, "finished_analysis_time", "") if not finished_analysis_time: finished_analysis_time = helpers.get_now() elapsed_time = finished_analysis_time - job.received_request_time seconds = elapsed_time.total_seconds() response_dict["elapsed_time_in_seconds"] = seconds logger.debug(response_dict) return Response(response_dict, status=status.HTTP_200_OK) except Exception as e: logger.exception(f"ask_analysis_result requester:{source} error:{e}") return Response( {"error": "error in ask_analysis_result. Check logs"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR, )
def remove_old_jobs(): """ this is to remove old jobs to avoid to fill the database. Retention can be modified. """ logger.info("started remove_old_jobs") retention_days = secrets.get_secret("OLD_JOBS_RETENTION_DAYS") if not retention_days: retention_days = 3 retention_days = int(retention_days) now = get_now() date_to_check = now - datetime.timedelta(days=retention_days) old_jobs = Job.objects.filter(finished_analysis_time__lt=date_to_check) num_jobs_to_delete = len(old_jobs) logger.info(f"found {num_jobs_to_delete} old jobs to delete") old_jobs.delete() logger.info("finished remove_old_jobs") return num_jobs_to_delete