def _check_children(self): finished = True failed = [] for child_id in self.job.children: try: child_job = Job.objects.get(id=child_id) except Job.DoesNotExist: failed.append(child_id) continue if child_job.status == JobStatus.FAILED: failed.append(child_id) if child_job.status in (JobStatus.IN_PROGRESS, JobStatus.CREATED, JobStatus.WAITING_FOR_CHILDREN): finished = False break if finished: if failed: self.job.status = JobStatus.FAILED self.job.message = { "details": "Child jobs %s failed" % ', '.join(failed) } self._job_failed() else: self.job.status = JobStatus.COMPLETED self._job_successful() if self.job.callback: job = Job(run=self.job.callback, args=self.job.callback_args, status=JobStatus.CREATED, max_retry=1, children=[], job_group=self.job.job_group) job.save()
def create_sample_job(sample_id, igocomplete, request_id, request_metadata, redelivery=False, job_group=None, job_group_notifier=None): job = Job( run=TYPES["SAMPLE"], args={ "sample_id": sample_id, "igocomplete": igocomplete, "request_id": request_id, "request_metadata": request_metadata, "redelivery": redelivery, "job_group_notifier": str(job_group_notifier.id), }, status=JobStatus.CREATED, max_retry=1, children=[], job_group=job_group, job_group_notifier=job_group_notifier, ) job.save() return job
def fetch_requests_lims(): logger.info("ETL fetching requestIDs") running = Job.objects.filter(run=TYPES["DELIVERY"], status__in=(JobStatus.CREATED, JobStatus.IN_PROGRESS, JobStatus.WAITING_FOR_CHILDREN)) if len(running) > 0: logger.info( format_log("ETL job already in progress", obj=running.first())) return latest = Job.objects.filter( run=TYPES["DELIVERY"]).order_by("-created_date").first() timestamp = None if latest: timestamp = int(latest.created_date.timestamp()) * 1000 else: timestamp = int((datetime.datetime.now() - datetime.timedelta(hours=120)).timestamp()) * 1000 job = Job( run="beagle_etl.jobs.lims_etl_jobs.fetch_new_requests_lims", args={"timestamp": timestamp}, status=JobStatus.CREATED, max_retry=3, children=[], ) job.save() logger.info(format_log("ETL fetch_new_requests_lims job created", obj=job))
def create_request_job(request_id, redelivery=False): logger.info( "Searching for job: %s for request_id: %s" % (TYPES['REQUEST'], request_id)) count = Job.objects.filter(run=TYPES['REQUEST'], args__request_id=request_id, status__in=[JobStatus.CREATED, JobStatus.IN_PROGRESS, JobStatus.WAITING_FOR_CHILDREN]).count() request_redelivered = Job.objects.filter(run=TYPES['REQUEST'], args__request_id=request_id).count() > 0 assays = ETLConfiguration.objects.first() if request_redelivered and not (assays.redelivery and redelivery): return None, "Request is redelivered, but redelivery deactivated" if count == 0: job_group = JobGroup() job_group.save() job_group_notifier_id = notifier_start(job_group, request_id) job_group_notifier = JobGroupNotifier.objects.get(id=job_group_notifier_id) job = Job(run=TYPES['REQUEST'], args={'request_id': request_id, 'job_group': str(job_group.id), 'job_group_notifier': job_group_notifier_id, 'redelivery': request_redelivered}, status=JobStatus.CREATED, max_retry=1, children=[], callback=TYPES['REQUEST_CALLBACK'], callback_args={'request_id': request_id, 'job_group': str(job_group.id), 'job_group_notifier': job_group_notifier_id}, job_group=job_group, job_group_notifier=job_group_notifier) job.save() if request_redelivered: redelivery_event = RedeliveryEvent(job_group_notifier_id).to_dict() send_notification.delay(redelivery_event) return job, "Job Created"
class JobViewTest(APITestCase): def setUp(self): admin_user = User.objects.create_superuser('admin', 'sample_email', 'password') self.client.force_authenticate(user=admin_user) self.job_group1 = JobGroup(jira_id='jira_id1') self.job_group1.save() self.job_group2 = JobGroup(jira_id='jira_id2') self.job_group2.save() self.job_group3 = JobGroup(jira_id='jira_id3') self.job_group3.save() self.job_group4 = JobGroup(jira_id='jira_id4') self.job_group4.save() self.job1 = Job(args={'key1':'value1','key2':'value2','boolean_key':True,'sample_id':'sample_id1','request_id':'request_id1'}, status=JobStatus.COMPLETED, job_group=self.job_group1, run=TYPES['SAMPLE']) self.job1.save() self.job2 = Job(args={'key1':'value1','key2':'1value2','boolean_key':False,'sample_id':'sample_id2','request_id':'request_id1'}, status=JobStatus.FAILED, job_group=self.job_group2, run=TYPES['POOLED_NORMAL']) self.job2.save() self.job3 = Job(args={'key1':'value1','key2':'1value2','boolean_key':False,'sample_id':'sample_id3','request_id':'request_id1'}, status=JobStatus.FAILED, job_group=self.job_group1, run=TYPES['POOLED_NORMAL']) self.job3.save() self.job4 = Job(args={'key1':'value1','key2':'1value4','boolean_key':False,'sample_id':'sample_id4','request_id':'request_id1'}, status=JobStatus.FAILED, job_group=self.job_group3, run=TYPES['POOLED_NORMAL']) self.job4.save() self.job5 = Job(args={'key1':'value1','key2':'1value2','boolean_key':False,'sample_id':'sample_id5','request_id':'request_id2'}, status=JobStatus.FAILED, job_group=self.job_group4, run=TYPES['POOLED_NORMAL']) self.job5.save() self.api_root = '/v0/etl/jobs' def test_query_job_group(self): response = self.client.get(self.api_root+'/?job_group='+str(self.job_group1.id)) self.assertEqual(len(response.json()['results']), 2) def test_query_job_type(self): response = self.client.get(self.api_root+'/?type=POOLED_NORMAL') self.assertEqual(len(response.json()['results']), 4) def test_query_sampleid(self): response = self.client.get(self.api_root+'/?sample_id=sample_id1') self.assertEqual(len(response.json()['results']), 1) def test_query_requestid(self): response = self.client.get(self.api_root+'/?request_id=request_id1') self.assertEqual(len(response.json()['results']), 4) response = self.client.get(self.api_root+'/?request_id=request_id1&sample_id=sample_id1') self.assertEqual(len(response.json()['results']), 1) def test_query_value_args(self): response = self.client.get(self.api_root+'/?values_args=key1,key2') self.assertEqual(len(response.json()['results']),3) def test_query_args(self): response = self.client.get(self.api_root+'/?args=key2:1value4') self.assertEqual(len(response.json()['results']), 1) response = self.client.get(self.api_root+'/?args=boolean_key:False') self.assertEqual(len(response.json()['results']), 4) def test_query_args_distribution(self): response = self.client.get(self.api_root+'/?args_distribution=key2') expected_result = {'1value2': 3, 'value2': 1, '1value4': 1} self.assertEqual(response.json(),expected_result)
def create_sample_job(sample_id, igocomplete, request_id, request_metadata, redelivery=False, job_group=None, job_group_notifier=None): job = Job(run=TYPES['SAMPLE'], args={'sample_id': sample_id, 'igocomplete': igocomplete, 'request_id': request_id, 'request_metadata': request_metadata, 'redelivery': redelivery, 'job_group_notifier': str(job_group_notifier.id)}, status=JobStatus.CREATED, max_retry=1, children=[], job_group=job_group, job_group_notifier=job_group_notifier) job.save() return job
def check_missing_requests(): """ Method implemented because some requests on LIMS can show up with the date from the past """ logger.info("Check for missing requests") timestamp = int((datetime.datetime.now() - datetime.timedelta(hours=12)).timestamp()) * 1000 job = Job(run='beagle_etl.jobs.lims_etl_jobs.fetch_new_requests_lims', args={'timestamp': timestamp, 'redelivery': False}, status=JobStatus.CREATED, max_retry=3, children=[]) job.save() logger.info("Fetching fetch_new_requests_lims job created")
def get_or_create_pooled_normal_job(filepath, job_group=None, job_group_notifier=None): logger.info( "Searching for job: %s for filepath: %s" % (TYPES['POOLED_NORMAL'], filepath)) job = Job.objects.filter(run=TYPES['POOLED_NORMAL'], args__filepath=filepath).first() if not job: job = Job(run=TYPES['POOLED_NORMAL'], args={'filepath': filepath, 'file_group_id': str(settings.POOLED_NORMAL_FILE_GROUP)}, status=JobStatus.CREATED, max_retry=1, children=[], job_group=job_group, job_group_notifier=job_group_notifier) job.save() return job
def _check_children(self): finished = True failed = [] permission_denied = False recipe = None for child_id in self.job.children: try: child_job = Job.objects.get(id=child_id) except Job.DoesNotExist: failed.append(child_id) continue if child_job.status == JobStatus.FAILED: failed.append(child_id) if isinstance( child_job.message, dict) and child_job.message.get( "code", 0) == 108: logger.error( format_log( "ETL job failed because of permission denied error", obj=self.job)) recipe = child_job.args.get("request_metadata", {}).get("recipe") permission_denied = True if child_job.status in (JobStatus.IN_PROGRESS, JobStatus.CREATED, JobStatus.WAITING_FOR_CHILDREN): finished = False break if finished: if failed: self.job.status = JobStatus.FAILED self.job.message = { "details": "Child jobs %s failed" % ", ".join(failed) } self._job_failed(permission_denied, recipe) else: self.job.status = JobStatus.COMPLETED self._job_successful() if self.job.callback: job = Job( run=self.job.callback, args=self.job.callback_args, status=JobStatus.CREATED, max_retry=1, children=[], job_group=self.job.job_group, ) job.save()
def fetch_requests_lims(): logger.info("Fetching requestIDs") running = Job.objects.filter(run=TYPES['DELIVERY'], status__in=(JobStatus.CREATED, JobStatus.IN_PROGRESS, JobStatus.WAITING_FOR_CHILDREN)) if len(running) > 0: logger.info("Job already in progress %s" % running.first()) return latest = Job.objects.filter(run=TYPES['DELIVERY']).order_by('-created_date').first() timestamp = None if latest: timestamp = int(latest.created_date.timestamp()) * 1000 else: timestamp = int((datetime.datetime.now() - datetime.timedelta(hours=120)).timestamp()) * 1000 job = Job(run='beagle_etl.jobs.lims_etl_jobs.fetch_new_requests_lims', args={'timestamp': timestamp}, status=JobStatus.CREATED, max_retry=3, children=[]) job.save() logger.info("Fetching fetch_new_requests_lims job created")
def setUp(self): admin_user = User.objects.create_superuser('admin', 'sample_email', 'password') self.client.force_authenticate(user=admin_user) self.job_group1 = JobGroup(jira_id='jira_id1') self.job_group1.save() self.job_group2 = JobGroup(jira_id='jira_id2') self.job_group2.save() self.job_group3 = JobGroup(jira_id='jira_id3') self.job_group3.save() self.job_group4 = JobGroup(jira_id='jira_id4') self.job_group4.save() self.job1 = Job(args={'key1':'value1','key2':'value2','boolean_key':True,'sample_id':'sample_id1','request_id':'request_id1'}, status=JobStatus.COMPLETED, job_group=self.job_group1, run=TYPES['SAMPLE']) self.job1.save() self.job2 = Job(args={'key1':'value1','key2':'1value2','boolean_key':False,'sample_id':'sample_id2','request_id':'request_id1'}, status=JobStatus.FAILED, job_group=self.job_group2, run=TYPES['POOLED_NORMAL']) self.job2.save() self.job3 = Job(args={'key1':'value1','key2':'1value2','boolean_key':False,'sample_id':'sample_id3','request_id':'request_id1'}, status=JobStatus.FAILED, job_group=self.job_group1, run=TYPES['POOLED_NORMAL']) self.job3.save() self.job4 = Job(args={'key1':'value1','key2':'1value4','boolean_key':False,'sample_id':'sample_id4','request_id':'request_id1'}, status=JobStatus.FAILED, job_group=self.job_group3, run=TYPES['POOLED_NORMAL']) self.job4.save() self.job5 = Job(args={'key1':'value1','key2':'1value2','boolean_key':False,'sample_id':'sample_id5','request_id':'request_id2'}, status=JobStatus.FAILED, job_group=self.job_group4, run=TYPES['POOLED_NORMAL']) self.job5.save() self.api_root = '/v0/etl/jobs'
def create_request_job(request_id, redelivery=False): logger.info("Searching for job: %s for request_id: %s" % (TYPES["REQUEST"], request_id)) count = Job.objects.filter( run=TYPES["REQUEST"], args__request_id=request_id, status__in=[ JobStatus.CREATED, JobStatus.IN_PROGRESS, JobStatus.WAITING_FOR_CHILDREN ], ).count() request_redelivered = Job.objects.filter( run=TYPES["REQUEST"], args__request_id=request_id).count() > 0 delivery_date = None try: request_from_lims = LIMSClient.get_request_samples(request_id) delivery_date = datetime.fromtimestamp( request_from_lims["deliveryDate"] / 1000) except Exception: logger.error("Failed to retrieve deliveryDate for request %s" % request_id) if not Request.objects.filter(request_id=request_id): Request.objects.create(request_id=request_id, delivery_date=delivery_date) assays = ETLConfiguration.objects.first() if request_redelivered and not (assays.redelivery and redelivery): return None, "Request is redelivered, but redelivery deactivated" if count == 0: job_group = JobGroup() job_group.save() job_group_notifier_id = notifier_start(job_group, request_id) job_group_notifier = JobGroupNotifier.objects.get( id=job_group_notifier_id) job = Job( run=TYPES["REQUEST"], args={ "request_id": request_id, "job_group": str(job_group.id), "job_group_notifier": job_group_notifier_id, "redelivery": request_redelivered, }, status=JobStatus.CREATED, max_retry=1, children=[], callback=TYPES["REQUEST_CALLBACK"], callback_args={ "request_id": request_id, "job_group": str(job_group.id), "job_group_notifier": job_group_notifier_id, }, job_group=job_group, job_group_notifier=job_group_notifier, ) job.save() if request_redelivered: redelivery_event = RedeliveryEvent(job_group_notifier_id).to_dict() send_notification.delay(redelivery_event) request_obj = Request.objects.filter(request_id=request_id).first() if request_obj: delivery_date_event = SetDeliveryDateFieldEvent( job_group_notifier_id, str(request_obj.delivery_date)).to_dict() send_notification.delay(delivery_date_event) return job, "Job Created"
def setUp(self): admin_user = User.objects.create_superuser("admin", "sample_email", "password") self.client.force_authenticate(user=admin_user) self.job_group1 = JobGroup() self.job_group1.save() self.job_group2 = JobGroup() self.job_group2.save() self.job_group3 = JobGroup() self.job_group3.save() self.job_group4 = JobGroup() self.job_group4.save() self.job1 = Job( args={ "key1": "value1", "key2": "value2", "boolean_key": True, "sample_id": "sample_id1", "request_id": "request_id1", }, status=JobStatus.COMPLETED, job_group=self.job_group1, run=TYPES["SAMPLE"], ) self.job1.save() self.job2 = Job( args={ "key1": "value1", "key2": "1value2", "boolean_key": False, "sample_id": "sample_id2", "request_id": "request_id1", }, status=JobStatus.FAILED, job_group=self.job_group2, run=TYPES["POOLED_NORMAL"], ) self.job2.save() self.job3 = Job( args={ "key1": "value1", "key2": "1value2", "boolean_key": False, "sample_id": "sample_id3", "request_id": "request_id1", }, status=JobStatus.FAILED, job_group=self.job_group1, run=TYPES["POOLED_NORMAL"], ) self.job3.save() self.job4 = Job( args={ "key1": "value1", "key2": "1value4", "boolean_key": False, "sample_id": "sample_id4", "request_id": "request_id1", }, status=JobStatus.FAILED, job_group=self.job_group3, run=TYPES["POOLED_NORMAL"], ) self.job4.save() self.job5 = Job( args={ "key1": "value1", "key2": "1value2", "boolean_key": False, "sample_id": "sample_id5", "request_id": "request_id2", }, status=JobStatus.FAILED, job_group=self.job_group4, run=TYPES["POOLED_NORMAL"], ) self.job5.save() self.api_root = "/v0/etl/jobs"
class JobViewTest(APITestCase): def setUp(self): admin_user = User.objects.create_superuser("admin", "sample_email", "password") self.client.force_authenticate(user=admin_user) self.job_group1 = JobGroup() self.job_group1.save() self.job_group2 = JobGroup() self.job_group2.save() self.job_group3 = JobGroup() self.job_group3.save() self.job_group4 = JobGroup() self.job_group4.save() self.job1 = Job( args={ "key1": "value1", "key2": "value2", "boolean_key": True, "sample_id": "sample_id1", "request_id": "request_id1", }, status=JobStatus.COMPLETED, job_group=self.job_group1, run=TYPES["SAMPLE"], ) self.job1.save() self.job2 = Job( args={ "key1": "value1", "key2": "1value2", "boolean_key": False, "sample_id": "sample_id2", "request_id": "request_id1", }, status=JobStatus.FAILED, job_group=self.job_group2, run=TYPES["POOLED_NORMAL"], ) self.job2.save() self.job3 = Job( args={ "key1": "value1", "key2": "1value2", "boolean_key": False, "sample_id": "sample_id3", "request_id": "request_id1", }, status=JobStatus.FAILED, job_group=self.job_group1, run=TYPES["POOLED_NORMAL"], ) self.job3.save() self.job4 = Job( args={ "key1": "value1", "key2": "1value4", "boolean_key": False, "sample_id": "sample_id4", "request_id": "request_id1", }, status=JobStatus.FAILED, job_group=self.job_group3, run=TYPES["POOLED_NORMAL"], ) self.job4.save() self.job5 = Job( args={ "key1": "value1", "key2": "1value2", "boolean_key": False, "sample_id": "sample_id5", "request_id": "request_id2", }, status=JobStatus.FAILED, job_group=self.job_group4, run=TYPES["POOLED_NORMAL"], ) self.job5.save() self.api_root = "/v0/etl/jobs" def test_query_job_group(self): response = self.client.get(self.api_root + "/?job_group=" + str(self.job_group1.id)) self.assertEqual(len(response.json()["results"]), 2) def test_query_job_type(self): response = self.client.get(self.api_root + "/?type=POOLED_NORMAL") self.assertEqual(len(response.json()["results"]), 4) def test_query_sampleid(self): response = self.client.get(self.api_root + "/?sample_id=sample_id1") self.assertEqual(len(response.json()["results"]), 1) def test_query_requestid(self): response = self.client.get(self.api_root + "/?request_id=request_id1") self.assertEqual(len(response.json()["results"]), 4) response = self.client.get(self.api_root + "/?request_id=request_id1&sample_id=sample_id1") self.assertEqual(len(response.json()["results"]), 1) def test_query_value_args(self): response = self.client.get(self.api_root + "/?values_args=key1,key2") self.assertEqual(len(response.json()["results"]), 3) def test_query_args(self): response = self.client.get(self.api_root + "/?args=key2:1value4") self.assertEqual(len(response.json()["results"]), 1) response = self.client.get(self.api_root + "/?args=boolean_key:False") self.assertEqual(len(response.json()["results"]), 4) def test_query_args_distribution(self): response = self.client.get(self.api_root + "/?args_distribution=key2") expected_result = {"1value2": 3, "value2": 1, "1value4": 1} self.assertEqual(response.json(), expected_result)