def process(self): if self.job.status == JobStatus.CREATED: self.job.status = JobStatus.IN_PROGRESS elif self.job.status == JobStatus.IN_PROGRESS: self.job.retry_count = self.job.retry_count + 1 try: self._process() self.job.status = JobStatus.WAITING_FOR_CHILDREN except Exception as e: if isinstance(e, ETLExceptions): message = {"message": str(e), "code": e.code} else: message = {'message': str(e)} if self.job.retry_count == self.job.max_retry: self.job.status = JobStatus.FAILED self.job.message = message self._job_failed() elif self.job.status == JobStatus.WAITING_FOR_CHILDREN: self._check_children() logger.info("Job %s in status: %s" % (str(self.job.id), JobStatus(self.job.status).name)) self._unlock() self._save()
def test_fetch_samples1(self): """ Test fetching samples for a request from IGO LIMS Should import Pooled Normal samples automatically TODO: Mock LIMS API for this test and then remove skip """ # sanity check that starting db is empty files = File.objects.all() files_metadata = FileMetadata.objects.all() jobs = Job.objects.all() self.assertTrue(len(files) == 0) self.assertTrue(len(files_metadata) == 0) self.assertTrue(len(jobs) == 0) request_id = "10075_D" child_jobs = fetch_samples(request_id=request_id) # check that jobs were created successfully jobs = Job.objects.all() job_ids = [job.id for job in jobs] self.assertTrue(len(jobs) == len(child_jobs)) self.assertTrue(len(jobs) == 17) for child_job in child_jobs: self.assertTrue(UUID(child_job) in job_ids) # need to run the job scheduler at least twice to completely process all jobs # TODO: need to split apart the IGO LIMS query from the sample import logic, so we can pass in mock JSON blob representing expected IGO LIMS API response to avoid having to actually query the real API for testing print(">>> running job scheduler") scheduler() scheduler() scheduler() print(">>> job scheduler complete") # check that all jobs completed successfully jobs = Job.objects.filter( run='beagle_etl.jobs.lims_etl_jobs.create_pooled_normal').all() for job in jobs: print("%s %s" % (job.run, JobStatus(job.status).name)) self.assertTrue(job.status == JobStatus.COMPLETED) # check for updated files in the database files = File.objects.all() files_metadata = FileMetadata.objects.all() self.assertTrue(len(files) == 22) self.assertTrue(len(files_metadata) == 22) import_files = File.objects.filter( file_group=settings.IMPORT_FILE_GROUP) import_files_metadata = FileMetadata.objects.filter( file__in=[i.id for i in import_files]) pooled_normal_files = File.objects.filter( file_group=settings.POOLED_NORMAL_FILE_GROUP) pooled_normal_files_metadata = FileMetadata.objects.filter( file__in=[i.id for i in pooled_normal_files]) self.assertTrue(len(import_files) == 10) self.assertTrue(len(import_files_metadata) == 10) self.assertTrue(len(pooled_normal_files) == 12) self.assertTrue(len(pooled_normal_files_metadata) == 12)
def request_callback(request_id, job_group=None, job_group_notifier=None): jg = None jgn = None try: jgn = JobGroupNotifier.objects.get(id=job_group_notifier) logger.debug("[RequestCallback] JobGroup id: %s", job_group) except JobGroupNotifier.DoesNotExist: logger.debug("[RequestCallback] JobGroup not set") job_group_notifier_id = str(jgn.id) if jgn else None assays = ETLConfiguration.objects.first() recipe = LIMSClient.get_request_samples(request_id).get("recipe", None) if (not all([ JobStatus(job["status"]) == JobStatus.COMPLETED for job in Job.objects.filter(job_group=job_group, run=TYPES["SAMPLE"], args__igocomplete=True).values("status") ]) and recipe in settings.WES_ASSAYS): wes_job_failed = WESJobFailedEvent(job_group_notifier_id, recipe) send_notification.delay(wes_job_failed.to_dict()) if not recipe: raise FailedToSubmitToOperatorException( "Not enough metadata to choose the operator for requestId:%s" % request_id) if not all(item in assays.all_recipes for item in [recipe]): ci_review_e = SetCIReviewEvent(job_group_notifier_id).to_dict() send_notification.delay(ci_review_e) set_unknown_assay_label = SetLabelEvent( job_group_notifier_id, "unrecognized_assay").to_dict() send_notification.delay(set_unknown_assay_label) unknown_assay_event = UnknownAssayEvent(job_group_notifier_id, recipe).to_dict() send_notification.delay(unknown_assay_event) return [] if any(item in assays.hold_recipes for item in [ recipe, ]): admin_hold_event = AdminHoldEvent(job_group_notifier_id).to_dict() send_notification.delay(admin_hold_event) custom_capture_event = CustomCaptureCCEvent(job_group_notifier_id, recipe).to_dict() send_notification.delay(custom_capture_event) return [] if any(item in assays.disabled_recipes for item in [ recipe, ]): not_for_ci = NotForCIReviewEvent(job_group_notifier_id).to_dict() send_notification.delay(not_for_ci) disabled_assay_event = DisabledAssayEvent(job_group_notifier_id, recipe).to_dict() send_notification.delay(disabled_assay_event) return [] if len( FileRepository.filter(metadata={ "requestId": request_id }, values_metadata="recipe").all()) == 0: no_samples_event = AdminHoldEvent(job_group_notifier_id).to_dict() send_notification.delay(no_samples_event) return [] if not all([ JobStatus(job["status"]) == JobStatus.COMPLETED for job in Job.objects.filter(job_group=job_group).values("status") ]): ci_review_e = SetCIReviewEvent(job_group_notifier_id).to_dict() send_notification.delay(ci_review_e) lab_head_email = FileRepository.filter( metadata={ "requestId": request_id }, values_metadata="labHeadEmail").first() try: if lab_head_email.split("@")[1] != "mskcc.org": event = ExternalEmailEvent(job_group_notifier_id, request_id).to_dict() send_notification.delay(event) except Exception: logger.error("Failed to check labHeadEmail") if len( FileRepository.filter(metadata={ "requestId": request_id, "tumorOrNormal": "Tumor" })) == 0: only_normal_samples_event = OnlyNormalSamplesEvent( job_group_notifier_id, request_id).to_dict() send_notification.delay(only_normal_samples_event) if recipe in settings.ASSAYS_ADMIN_HOLD_ONLY_NORMALS: admin_hold_event = AdminHoldEvent(job_group_notifier_id).to_dict() send_notification.delay(admin_hold_event) return [] operators = Operator.objects.filter(recipes__overlap=[recipe]) if not operators: # TODO: Import ticket will have CIReviewNeeded msg = "No operator defined for requestId %s with recipe %s" % ( request_id, recipe) logger.error(msg) e = OperatorRequestEvent(job_group_notifier_id, "[CIReviewEvent] %s" % msg).to_dict() send_notification.delay(e) ci_review_e = SetCIReviewEvent(job_group_notifier_id).to_dict() send_notification.delay(ci_review_e) raise FailedToSubmitToOperatorException(msg) for operator in operators: if not operator.active: msg = "Operator not active: %s" % operator.class_name logger.info(msg) e = OperatorRequestEvent(job_group_notifier_id, "[CIReviewEvent] %s" % msg).to_dict() send_notification.delay(e) error_label = SetLabelEvent(job_group_notifier_id, "operator_inactive").to_dict() send_notification.delay(error_label) ci_review_e = SetCIReviewEvent(job_group_notifier_id).to_dict() send_notification.delay(ci_review_e) else: logger.info("Submitting request_id %s to %s operator" % (request_id, operator.class_name)) if Job.objects.filter(job_group=job_group, args__request_id=request_id, run=TYPES["SAMPLE"], status=JobStatus.FAILED).all(): partialy_complete_event = ETLImportPartiallyCompleteEvent( job_notifier=job_group_notifier_id).to_dict() send_notification.delay(partialy_complete_event) else: complete_event = ETLImportCompleteEvent( job_notifier=job_group_notifier_id).to_dict() send_notification.delay(complete_event) create_jobs_from_request.delay(request_id, operator.id, job_group) return []
def _generate_ticket_decription(self): samples_completed = set() samples_failed = set() all_jobs = [] request_jobs = [] sample_jobs = [] pooled_normal_jobs = [] jobs = Job.objects.filter(job_group=self.job.job_group.id).all() for job in jobs: if job.run == TYPES['SAMPLE']: if job.status == JobStatus.COMPLETED: samples_completed.add(job.args['sample_id']) elif job.status == JobStatus.FAILED: samples_failed.add(job.args['sample_id']) if job.run == TYPES['SAMPLE']: sample_jobs.append((str(job.id), JobStatus(job.status).name, self.get_key(job.run), job.message or "", job.args.get('sample_id', ''))) elif job.run == TYPES['REQUEST']: request_jobs.append( (str(job.id), '', self.get_key(job.run), job.message or "", '')) elif job.run == TYPES['POOLED_NORMAL']: pooled_normal_jobs.append( (str(job.id), JobStatus(job.status).name, self.get_key(job.run), job.message or "", job.args.get('sample_id', ''))) all_jobs.extend(request_jobs) all_jobs.extend(sample_jobs) all_jobs.extend(pooled_normal_jobs) request_metadata = Job.objects.filter(args__request_id=self.job.args['request_id'], run=TYPES['SAMPLE']).order_by('-created_date').first() number_of_tumors = FileRepository.filter( metadata={'requestId': self.job.args['request_id'], 'tumorOrNormal': 'Tumor'}, values_metadata='sampleId').count() number_of_normals = FileRepository.filter( metadata={'requestId': self.job.args['request_id'], 'tumorOrNormal': 'Normal'}, values_metadata='sampleId').count() data_analyst_email = "" data_analyst_name = "" investigator_email = "" investigator_name = "" lab_head_email = "" lab_head_name = "" pi_email = "" project_manager_name = "" recipe = "" qc_access_emails = "" if request_metadata: metadata = request_metadata.args.get('request_metadata', {}) recipe = metadata['recipe'] data_analyst_email = metadata['dataAnalystEmail'] data_analyst_name = metadata['dataAnalystName'] investigator_email = metadata['investigatorEmail'] investigator_name = metadata['investigatorName'] lab_head_email = metadata['labHeadEmail'] lab_head_name = metadata['labHeadName'] pi_email = metadata['piEmail'] project_manager_name = metadata['projectManagerName'] qc_access_emails = metadata['qcAccessEmails'] event = ETLImportEvent(str(self.job.job_group_notifier.id), str(self.job.job_group.id), self.job.args['request_id'], list(samples_completed), list(samples_failed), recipe, data_analyst_email, data_analyst_name, investigator_email, investigator_name, lab_head_email, lab_head_name, pi_email, project_manager_name, qc_access_emails, number_of_tumors, number_of_normals, len(pooled_normal_jobs) ) e = event.to_dict() send_notification.delay(e) etl_event = ETLJobsLinksEvent(str(self.job.job_group_notifier.id), self.job.args['request_id'], all_jobs) etl_e = etl_event.to_dict() send_notification.delay(etl_e)
def _generate_ticket_decription(self): samples_completed = set() samples_failed = set() all_jobs = [] request_jobs = [] sample_jobs = [] pooled_normal_jobs = [] jobs = Job.objects.filter(job_group=self.job.job_group.id).all() for job in jobs: if job.run == TYPES["SAMPLE"]: if job.status == JobStatus.COMPLETED: samples_completed.add(job.args["sample_id"]) elif job.status == JobStatus.FAILED: samples_failed.add(job.args["sample_id"]) if job.run == TYPES["SAMPLE"]: sample_jobs.append(( str(job.id), JobStatus(job.status).name, self.get_key(job.run), job.message or "", job.args.get("sample_id", ""), )) elif job.run == TYPES["REQUEST"]: request_jobs.append( (str(job.id), "", self.get_key(job.run), job.message or "", "")) elif job.run == TYPES["POOLED_NORMAL"]: pooled_normal_jobs.append(( str(job.id), JobStatus(job.status).name, self.get_key(job.run), job.message or "", job.args.get("sample_id", ""), )) all_jobs.extend(request_jobs) all_jobs.extend(sample_jobs) all_jobs.extend(pooled_normal_jobs) request_metadata = (Job.objects.filter( args__request_id=self.job.args["request_id"], run=TYPES["SAMPLE"]).order_by("-created_date").first()) number_of_tumors = FileRepository.filter( metadata={ "requestId": self.job.args["request_id"], "tumorOrNormal": "Tumor" }, values_metadata="sampleId").count() number_of_normals = FileRepository.filter( metadata={ "requestId": self.job.args["request_id"], "tumorOrNormal": "Normal" }, values_metadata="sampleId").count() data_analyst_email = "" data_analyst_name = "" investigator_email = "" investigator_name = "" lab_head_email = "" lab_head_name = "" pi_email = "" project_manager_name = "" recipe = "" qc_access_emails = "" data_access_emails = "" other_contact_emails = "" if request_metadata: metadata = request_metadata.args.get("request_metadata", {}) recipe = metadata["recipe"] data_analyst_email = metadata["dataAnalystEmail"] data_analyst_name = metadata["dataAnalystName"] investigator_email = metadata["investigatorEmail"] investigator_name = metadata["investigatorName"] lab_head_email = metadata["labHeadEmail"] lab_head_name = metadata["labHeadName"] pi_email = metadata["piEmail"] project_manager_name = metadata["projectManagerName"] qc_access_emails = metadata["qcAccessEmails"] data_access_emails = metadata["dataAccessEmails"] other_contact_emails = metadata["otherContactEmails"] event = ETLImportEvent( str(self.job.job_group_notifier.id), str(self.job.job_group.id), self.job.args["request_id"], list(samples_completed), list(samples_failed), recipe, data_analyst_email, data_analyst_name, investigator_email, investigator_name, lab_head_email, lab_head_name, pi_email, project_manager_name, qc_access_emails, number_of_tumors, number_of_normals, len(pooled_normal_jobs), data_access_emails, other_contact_emails, ) e = event.to_dict() send_notification.delay(e) etl_event = ETLJobsLinksEvent(str(self.job.job_group_notifier.id), self.job.args["request_id"], all_jobs) etl_e = etl_event.to_dict() send_notification.delay(etl_e)