def _setup_drydock_client(self): """Setup the drydock client for use by this operator""" # Retrieve Endpoint Information self.drydock_svc_endpoint = self.endpoints.endpoint_by_name( service_endpoint.DRYDOCK) LOG.info("Drydock endpoint is %s", self.drydock_svc_endpoint) # Parse DryDock Service Endpoint drydock_url = urlparse(self.drydock_svc_endpoint) # Build a DrydockSession with credentials and target host # information. # The DrydockSession will care for TCP connection pooling # and header management dd_session = session.DrydockSession(drydock_url.hostname, port=drydock_url.port, auth_gen=self._auth_gen) # Raise Exception if we are not able to set up the session if not dd_session: raise DrydockClientUseFailureException( "Failed to set up Drydock Session!") # Use the DrydockSession to build a DrydockClient that can # be used to make one or more API calls self.drydock_client = client.DrydockClient(dd_session) # Raise Exception if we are not able to build the client if not self.drydock_client: raise DrydockClientUseFailureException( "Failed to set up Drydock Client!") LOG.info("Drydock Session and Client etablished.")
def fetch_failure_details(self): LOG.info('Retrieving all tasks records from Drydock...') try: # Get all tasks records all_tasks = self.drydock_client.get_tasks() # Create a dictionary of tasks records with 'task_id' as key self.all_task_ids = {t['task_id']: t for t in all_tasks} except errors.ClientError as client_error: raise DrydockClientUseFailureException(client_error) # Retrieve the failed parent task and assign it to list failed_parent_task = ( [x for x in all_tasks if x['task_id'] == self.drydock_task_id]) # Print detailed information of failed parent task in json output # Since there is only 1 failed parent task, we will print index 0 # of the list if failed_parent_task: LOG.error("%s task has either failed or timed out", failed_parent_task[0]['action']) LOG.error(pprint.pprint(failed_parent_task[0])) # Get the list of subtasks belonging to the failed parent task parent_subtask_id_list = failed_parent_task[0]['subtask_id_list'] # Check for failed subtasks self.check_subtask_failure(parent_subtask_id_list) else: LOG.info("No failed parent task found for task_id %s", self.drydock_task_id)
def create_task(self, task_action): # Initialize Variables create_task_response = {} # Node Filter LOG.info("Nodes Filter List: %s", self.node_filter) try: # Create Task create_task_response = self.drydock_client.create_task( design_ref=self.design_ref, task_action=task_action, node_filter=self.node_filter) except errors.ClientError as client_error: raise DrydockClientUseFailureException(client_error) # Retrieve Task ID self.drydock_task_id = create_task_response['task_id'] LOG.info('Drydock %s task ID is %s', task_action, self.drydock_task_id) # Raise Exception if we are not able to get the task_id from # Drydock if self.drydock_task_id: return self.drydock_task_id else: raise DrydockTaskNotCreatedException("Unable to create task!")
def check_subtask_failure(self, subtask_id_list): LOG.info("Printing information of failed sub-tasks...") while subtask_id_list: # Copies the current list (a layer) children_subtask_id_list = copy.copy(subtask_id_list) # Reset subtask_id_list for each layer # The last layer will be an empty list subtask_id_list = [] # Print detailed information of failed step(s) under each # subtask. This will help to provide additional information # for troubleshooting purpose. for subtask_id in children_subtask_id_list: LOG.info("Retrieving details of subtask %s...", subtask_id) # Retrieve task information task = self.all_task_ids.get(subtask_id) if task: # Print subtask action and state LOG.info("%s subtask is in %s state", task['action'], task['result']['status']) # Check for subtasks and extend the list subtask_id_list.extend(task['subtask_id_list']) # Check if error count is greater than 0 if task['result']['details']['errorCount'] > 0: # Get message list message_list = ( task['result']['details']['messageList'] or []) # Print information of failed steps for message in message_list: is_error = message['error'] is True if is_error: LOG.error(pprint.pprint(message)) else: LOG.info("No failed step detected for subtask %s", subtask_id) else: raise DrydockClientUseFailureException( "Unable to retrieve subtask info!" )
def get_task_dict(self, task_id): """Retrieve task output in its raw dictionary format :param task_id: The id of the task to retrieve Raises DrydockClientUseFailureException if the client raises an exception See: https://airship-drydock.readthedocs.io/en/latest/task.html#task-status-schema """ try: return self.drydock_client.get_task(task_id=task_id) except errors.ClientError as client_error: raise DrydockClientUseFailureException(client_error)
def get_nodes(self): """ Get the list of all the build data record for all nodes(hostname) in raw dictionary format. Raises DrydockClientUseFailureException if the client raises an exception See: https://airship-drydock.readthedocs.io/en/latest/API.html """ try: return self.drydock_client.get_nodes() except errors.ClientError as client_error: LOG.error("Drydock client failed to get nodes from Drydock.") raise DrydockClientUseFailureException(client_error)
def task_failure(self, _task_failure): # Dump logs from Drydock pods self.get_k8s_logs() LOG.info('Retrieving all tasks records from Drydock...') try: # Get all tasks records all_tasks = self.drydock_client.get_tasks() # Create a dictionary of tasks records with 'task_id' as key self.all_task_ids = {t['task_id']: t for t in all_tasks} except errors.ClientError as client_error: raise DrydockClientUseFailureException(client_error) # Retrieve the failed parent task and assign it to list failed_parent_task = ([ x for x in all_tasks if x['task_id'] == self.drydock_task_id ]) # Print detailed information of failed parent task in json output # Since there is only 1 failed parent task, we will print index 0 # of the list if failed_parent_task: LOG.error("%s task has either failed or timed out", failed_parent_task[0]['action']) LOG.error(pprint.pprint(failed_parent_task[0])) # Get the list of subtasks belonging to the failed parent task parent_subtask_id_list = failed_parent_task[0]['subtask_id_list'] # Check for failed subtasks self.check_subtask_failure(parent_subtask_id_list) # Raise Exception to terminate workflow if _task_failure: raise DrydockTaskFailedException( "Failed to Execute/Complete Task!") else: raise DrydockTaskTimeoutException("Task Execution Timed Out!")
def run_base(self, context): # Logs uuid of action performed by the Operator LOG.info("DryDock Operator for action %s", self.action_info['id']) # Skip workflow if health checks on Drydock failed and continue-on-fail # option is turned on if self.xcom_puller.get_check_drydock_continue_on_fail(): LOG.info( "Skipping %s as health checks on Drydock have " "failed and continue-on-fail option has been " "turned on", self.__class__.__name__) # Set continue processing to False self.continue_processing = False return # Retrieve information of the server that we want to redeploy if user # executes the 'redeploy_server' dag # Set node filter to be the server that we want to redeploy if self.action_info['dag_id'] == 'redeploy_server': self.redeploy_server = ( self.action_info['parameters']['server-name']) if self.redeploy_server: LOG.info("Server to be redeployed is %s", self.redeploy_server) self.node_filter = self.redeploy_server else: raise AirflowException('%s was unable to retrieve the ' 'server to be redeployed.' % self.__class__.__name__) # Retrieve Endpoint Information self.drydock_svc_endpoint = self.endpoints.endpoint_by_name( service_endpoint.DRYDOCK) LOG.info("Drydock endpoint is %s", self.drydock_svc_endpoint) # Parse DryDock Service Endpoint drydock_url = urlparse(self.drydock_svc_endpoint) # Build a DrydockSession with credentials and target host # information. # The DrydockSession will care for TCP connection pooling # and header management LOG.info("Build DryDock Session") dd_session = session.DrydockSession(drydock_url.hostname, port=drydock_url.port, auth_gen=self._auth_gen) # Raise Exception if we are not able to set up the session if dd_session: LOG.info("Successfully Set Up DryDock Session") else: raise DrydockClientUseFailureException( "Failed to set up Drydock Session!") # Use the DrydockSession to build a DrydockClient that can # be used to make one or more API calls LOG.info("Create DryDock Client") self.drydock_client = client.DrydockClient(dd_session) # Raise Exception if we are not able to build the client if self.drydock_client: LOG.info("Successfully Set Up DryDock client") else: raise DrydockClientUseFailureException( "Failed to set up Drydock Client!")