def workflow_is_duplicate(self, workflow): hash_id = workflow.labels.get('hash-id') query_dict = { 'label': f'hash-id:{hash_id}', 'additionalQueryResultFields': 'labels', } response = CromwellAPI.query(query_dict, self.cromwell_auth, raise_for_status=True) results = response.json()['results'] existing_workflows = [ result for result in results if result['id'] != workflow.id ] if len(existing_workflows) > 0: # If there are other on-hold workflows with the same hash-id, # the workflow is a duplicate/should not be run if it has an older bundle version than the others on_hold = [ workflow for workflow in existing_workflows if workflow['status'] == 'On Hold' ] if len(on_hold) == 0: return True else: on_hold.sort(key=lambda x: self.get_bundle_datetime(x[ 'labels']['bundle-version'])) workflow_bundle_version = self.get_bundle_datetime( workflow.bundle_version) return workflow_bundle_version < self.get_bundle_datetime( on_hold[-1]['labels']['bundle-version']) return False
def retrieve_workflows(self, query_dict): """ Retrieve the latest list of metadata of all "On Hold" workflows from Cromwell. Args: query_dict (dict): A dictionary that contains valid query parameters which can be accepted by the Cromwell /query endpoint. Returns: workflow_metas (None or list): Will be None if it gets a non 200 code from Cromwell, otherwise will be a list of workflow metadata dict blocks. e.g. ``` [ { "name": "WorkflowName1", "id": "xxx1", "submission": "2018-01-01T23:49:40.620Z", "status": "Succeeded", "end": "2018-07-12T00:37:12.282Z", "start": "2018-07-11T23:49:48.384Z" }, { "name": "WorkflowName2", "id": "xxx2", "submission": "2018-01-01T23:49:42.171Z", "status": "Succeeded", "end": "2018-07-12T00:31:27.273Z", "start": "2018-07-11T23:49:48.385Z" } ] ``` """ workflow_metas = None query_dict["additionalQueryResultFields"] = "labels" try: response = CromwellAPI.query(auth=self.cromwell_auth, query_dict=query_dict) if response.status_code != 200: logger.warning( "QueueHandler | Failed to retrieve workflows from Cromwell | {0} | {1}" .format(response.text, datetime.now())) else: workflow_metas = response.json()["results"] num_workflows = len(workflow_metas) logger.info( "QueueHandler | Retrieved {0} workflows from Cromwell. | {1}" .format(num_workflows, datetime.now())) logger.debug("QueueHandler | {0} | {1}".format( workflow_metas, datetime.now())) # TODO: remove this or not? except ( requests.exceptions.ConnectionError, requests.exceptions.RequestException, ) as error: logger.error( "QueueHandler | Failed to retrieve workflows from Cromwell | {0} | {1}" .format(error, datetime.now())) finally: return workflow_metas
def query_workflows(cromwell_auth, query): # Return worklfow ids matching conditions specified in query dict # e.g. query = {"label": [{"run_id": "12"},{"custom_label2": "barf"}]} # e.g. query = {"submission": "2020-01-10T14:53:48.128Z"} result = CromwellAPI.query(query, cromwell_auth) try: result.raise_for_status() except requests.exceptions.HTTPError as e: logging.error("Unable to run query: {0}".format(query)) logging.error("Message from cromwell server:\n{0}".format( result.json())) raise return [ wf["id"] for wf in result.json()['results'] if "parentWorkflowId" not in wf ]
def test_query_workflows_returns_200(self, mock_request): query_dict = { 'status': ['Running', 'Failed'], 'label': { 'label_key1': 'label_value1', 'label_key2': 'label_value2' }, } def _request_callback(request, context): context.status_code = 200 context.headers['test'] = 'header' return { 'results': [ { 'name': 'workflow1', 'submission': 'submission1', 'id': 'id1', 'status': 'Failed', 'start': 'start1', 'end': 'end1', }, { 'name': 'workflow2', 'submission': 'submission2', 'id': 'id2', 'status': 'Running', 'start': 'start2', 'end': 'end2', }, ], 'totalResultsCount': 2, } for cromwell_auth in self.auth_options: mock_request.post( '{}/api/workflows/v1/query'.format(cromwell_auth.url), json=_request_callback, ) result = CromwellAPI.query(query_dict, cromwell_auth) self.assertEqual(result.status_code, 200) self.assertEqual(result.json()['totalResultsCount'], 2)