Example #1
0
    def armada_base(self, context):

        # Define task_instance
        self.task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance)
        self.action_info = self.xcom_puller.get_action_info()
        self.dc = self.xcom_puller.get_deployment_configuration()

        # Set up xcom_pusher to push values to xcom
        self.xcom_pusher = XcomPusher(self.task_instance)

        # Logs uuid of action performed by the Operator
        logging.info("Armada Operator for action %s", self.action_info['id'])

        # Retrieve Endpoint Information
        armada_svc_endpoint = ucp_service_endpoint(
            self, svc_type=self.armada_svc_type)

        # Set up armada client
        self.armada_client = self._init_armada_client(armada_svc_endpoint,
                                                      self.svc_token)

        # Retrieve DeckHand Endpoint Information
        deckhand_svc_endpoint = ucp_service_endpoint(
            self, svc_type=self.deckhand_svc_type)

        # Retrieve last committed revision id
        committed_revision_id = self.xcom_puller.get_design_version()

        # Get deckhand design reference url
        self.deckhand_design_ref = self._init_deckhand_design_ref(
            deckhand_svc_endpoint, committed_revision_id)
    def promenade_base(self, context):
        # Define task_instance
        task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, task_instance)
        self.action_info = self.xcom_puller.get_action_info()
        self.dc = self.xcom_puller.get_deployment_configuration()

        # Logs uuid of Shipyard action
        logging.info("Executing Shipyard Action %s", self.action_info['id'])

        # Retrieve information of the server that we want to redeploy
        # if user executes the 'redeploy_server' dag
        if self.action_info['dag_id'] == 'redeploy_server':
            self.redeploy_server = self.action_info['parameters'].get(
                'server-name')

            if self.redeploy_server:
                logging.info("Server to be redeployed is %s",
                             self.redeploy_server)
            else:
                raise AirflowException('%s was unable to retrieve the '
                                       'server to be redeployed.' %
                                       self.__class__.__name__)

        # Retrieve promenade endpoint
        self.promenade_svc_endpoint = ucp_service_endpoint(
            self, svc_type=self.promenade_svc_type)

        logging.info("Promenade endpoint is %s", self.promenade_svc_endpoint)
    def ucp_base(self, context):

        LOG.info("Running Airship Base Operator...")

        # Read and parse shiyard.conf
        config = configparser.ConfigParser()
        config.read(self.shipyard_conf)

        # Initialize variable
        self.ucp_namespace = config.get('k8s_logs', 'ucp_namespace')

        # Define task_instance
        self.task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance)
        self.action_info = self.xcom_puller.get_action_info()
        self.action_type = self.xcom_puller.get_action_type()
        self.dc = self.xcom_puller.get_deployment_configuration()

        # Set up other common-use values
        self.action_id = self.action_info['id']
        self.revision_id = self.action_info['committed_rev_id']
        self.action_params = self.action_info.get('parameters', {})
        self.design_ref = self._deckhand_design_ref()
        self._setup_target_nodes()
Example #4
0
    def ucp_base(self, context):

        LOG.info("Running Airship Base Operator...")

        # Configure the notes helper for this run of an operator
        # establishes self.notes_helper
        self._setup_notes_helper()

        # Initialize variable that indicates the kubernetes namespace for the
        # Airship components
        self.ucp_namespace = self.config.get(K8S_LOGS, 'ucp_namespace')

        # Define task_instance
        self.task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance)
        self.action_info = self.xcom_puller.get_action_info()
        self.action_type = self.xcom_puller.get_action_type()
        self.dc = self.xcom_puller.get_deployment_configuration()

        # Set up other common-use values
        self.action_id = self.action_info['id']
        # extract the `task` or `step` name for easy access
        self.task_id = self.task_instance.task_id
        self.revision_id = self.action_info['committed_rev_id']
        self.action_params = self.action_info.get('parameters', {})
        self.design_ref = self._deckhand_design_ref()
        self._setup_target_nodes()
Example #5
0
    def deckhand_base(self, context):

        # Read and parse shiyard.conf
        config = configparser.ConfigParser()
        config.read(self.shipyard_conf)

        # Initialize variables
        self.deckhand_client_read_timeout = int(config.get(
            'requests_config', 'deckhand_client_read_timeout'))

        self.validation_read_timeout = int(config.get(
            'requests_config', 'validation_read_timeout'))

        # Define task_instance
        task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, task_instance)
        self.action_info = self.xcom_puller.get_action_info()

        # Logs uuid of Shipyard action
        logging.info("Executing Shipyard Action %s",
                     self.action_info['id'])

        # Retrieve Endpoint Information
        self.deckhand_svc_endpoint = ucp_service_endpoint(
            self, svc_type=self.deckhand_svc_type)

        logging.info("Deckhand endpoint is %s",
                     self.deckhand_svc_endpoint)

        # Set up DeckHand Client
        logging.info("Setting up DeckHand Client...")

        # NOTE: The communication between the Airflow workers
        # and Deckhand happens via the 'internal' endpoint.
        self.deckhandclient = deckhand_client.Client(
            session=self.svc_session, endpoint_type='internal')

        if not self.deckhandclient:
            raise AirflowException('Failed to set up deckhand client!')

        # Retrieve 'revision_id' from xcom for tasks other than
        # 'deckhand_get_design_version'
        #
        # NOTE: In the case of 'deploy_site', the dag_id will
        # be 'deploy_site.deckhand_get_design_version' for the
        # 'deckhand_get_design_version' task. We need to extract
        # the xcom value from it in order to get the value of the
        # last committed revision ID
        if self.task_id != 'deckhand_get_design_version':

            # Retrieve 'revision_id' from xcom
            self.revision_id = self.xcom_puller.get_design_version()

            if self.revision_id:
                logging.info("Revision ID is %d", self.revision_id)
            else:
                raise AirflowException('Failed to retrieve Revision ID!')
Example #6
0
    def execute(self, context):

        # Initialize variable
        ucp_components = [
            service_endpoint.ARMADA,
            service_endpoint.DECKHAND,
            service_endpoint.DRYDOCK,
            service_endpoint.PROMENADE,
            service_endpoint.SHIPYARD
        ]

        # Define task_instance
        self.task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance)
        self.action_info = self.xcom_puller.get_action_info()

        # Set up xcom_pusher to push values to xcom
        self.xcom_pusher = XcomPusher(self.task_instance)

        # Loop through various Airship Components
        for component in ucp_components:

            # Retrieve Endpoint Information
            endpoint = self.endpoints.endpoint_by_name(component)
            LOG.info("%s endpoint is %s", component, endpoint)

            # Construct Health Check Endpoint
            healthcheck_endpoint = os.path.join(endpoint,
                                                'health')

            try:
                LOG.info("Performing Health Check on %s at %s", component,
                         healthcheck_endpoint)
                # Set health check timeout to 30 seconds
                req = requests.get(healthcheck_endpoint, timeout=30)

                # An empty response/body returned by a component means
                # that it is healthy
                if req.status_code == 204:
                    LOG.info("%s is alive and healthy", component)

            except requests.exceptions.RequestException as e:
                self.log_health_exception(component, e)
Example #7
0
    def ucp_base(self, context):

        LOG.info("Running UCP Base Operator...")

        # Read and parse shiyard.conf
        config = configparser.ConfigParser()
        config.read(self.shipyard_conf)

        # Initialize variable
        self.ucp_namespace = config.get('k8s_logs', 'ucp_namespace')

        # Define task_instance
        self.task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance)
        self.action_info = self.xcom_puller.get_action_info()
        self.dc = self.xcom_puller.get_deployment_configuration()
        self.revision_id = self.action_info['committed_rev_id']
Example #8
0
    def execute(self, context):

        # Initialize variable
        ucp_components = [
            'armada', 'deckhand', 'kubernetesprovisioner',
            'physicalprovisioner', 'shipyard'
        ]

        # Define task_instance
        self.task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance)
        self.action_info = self.xcom_puller.get_action_info()

        # Set up xcom_pusher to push values to xcom
        self.xcom_pusher = XcomPusher(self.task_instance)

        # Loop through various UCP Components
        for component in ucp_components:

            # Retrieve Endpoint Information
            service_endpoint = ucp_service_endpoint(self, svc_type=component)
            LOG.info("%s endpoint is %s", component, service_endpoint)

            # Construct Health Check Endpoint
            healthcheck_endpoint = os.path.join(service_endpoint, 'health')

            LOG.info("%s healthcheck endpoint is %s", component,
                     healthcheck_endpoint)

            try:
                LOG.info("Performing Health Check on %s", component)
                # Set health check timeout to 30 seconds
                req = requests.get(healthcheck_endpoint, timeout=30)

                # An empty response/body returned by a component means
                # that it is healthy
                if req.status_code == 204:
                    LOG.info("%s is alive and healthy", component)

            except requests.exceptions.RequestException as e:
                self.log_health_exception(component, e)
Example #9
0
    def get_revision_id(self, task_instance):
        """Get the revision id from xcom"""
        if task_instance:
            LOG.debug("task_instance found, extracting design version")
            # Get XcomPuller instance
            self.xcom_puller = XcomPuller(self.main_dag_name, task_instance)
            # Set the revision_id to the revision on the xcom
            self.action_info = self.xcom_puller.get_action_info()

            revision_id = self.action_info['committed_rev_id']

            if revision_id:
                LOG.info("Revision is set to: %s for deployment-configuration",
                         revision_id)
                return revision_id
        # either revision id was not on xcom, or the task_instance is messed
        raise AirflowException(
            "Design_revision is not set. Cannot proceed with retrieval of"
            " the design configuration"
        )
Example #10
0
class UcpBaseOperator(BaseOperator):
    """Airship Base Operator

    All Airship related workflow operators will use the Airship base
    operator as the parent and inherit attributes and methods
    from this class

    """
    @apply_defaults
    def __init__(self,
                 main_dag_name=None,
                 pod_selector_pattern=None,
                 shipyard_conf=None,
                 start_time=None,
                 xcom_push=True,
                 *args,
                 **kwargs):
        """Initialization of UcpBaseOperator object.

        :param continue_processing: A boolean value on whether to continue
                                    with the workflow. Defaults to True.
        :param main_dag_name: Parent Dag
        :param pod_selector_pattern: A list containing the information on
                                     the patterns of the Pod name and name
                                     of the associated container for log
                                     queries. This will allow us to query
                                     multiple components, e.g. MAAS and
                                     Drydock at the same time. It also allows
                                     us to query the logs of specific container
                                     in Pods with multiple containers. For
                                     instance the Airflow worker pod contains
                                     both the airflow-worker container and the
                                     log-rotate container.
        :param shipyard_conf: Location of shipyard.conf
        :param start_time: Time when Operator gets executed
        :param xcom_push: xcom usage

        """

        super(UcpBaseOperator, self).__init__(*args, **kwargs)
        self.continue_processing = True
        self.main_dag_name = main_dag_name
        self.pod_selector_pattern = pod_selector_pattern or []
        self.shipyard_conf = shipyard_conf
        self.start_time = datetime.now()
        self.xcom_push_flag = xcom_push
        # lazy init field to hold a shipyard_db_engine
        self._shipyard_db_engine = None

    def execute(self, context):
        # Setup values that depend on the shipyard configuration
        self.doc_utils = _get_document_util(self.shipyard_conf)
        self.endpoints = service_endpoint.ServiceEndpoints(self.shipyard_conf)

        # Read and parse shiyard.conf
        self.config = configparser.ConfigParser()
        self.config.read(self.shipyard_conf)

        # Execute Airship base function
        self.ucp_base(context)

        # Execute base function for child operator
        self.run_base(context)

        if self.continue_processing:
            # Execute child function
            try:
                self.do_execute()
            except Exception:
                LOG.exception(
                    'Exception happened during %s execution, '
                    'will try to log additional details',
                    self.__class__.__name__)
                self.get_k8s_logs()
                if hasattr(self, 'fetch_failure_details'):
                    self.fetch_failure_details()
                raise

    def ucp_base(self, context):

        LOG.info("Running Airship Base Operator...")

        # Configure the notes helper for this run of an operator
        # establishes self.notes_helper
        self._setup_notes_helper()

        # Initialize variable that indicates the kubernetes namespace for the
        # Airship components
        self.ucp_namespace = self.config.get(K8S_LOGS, 'ucp_namespace')

        # Define task_instance
        self.task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance)
        self.action_info = self.xcom_puller.get_action_info()
        self.action_type = self.xcom_puller.get_action_type()
        self.dc = self.xcom_puller.get_deployment_configuration()

        # Set up other common-use values
        self.action_id = self.action_info['id']
        # extract the `task` or `step` name for easy access
        self.task_id = self.task_instance.task_id
        self.revision_id = self.action_info['committed_rev_id']
        self.action_params = self.action_info.get('parameters', {})
        self.design_ref = self._deckhand_design_ref()
        self._setup_target_nodes()

    def get_k8s_logs(self):
        """Retrieve Kubernetes pod/container logs specified by an operator

        This method is "best effort" and should not prevent the progress of
        the workflow processing
        """
        if self.pod_selector_pattern:
            for selector in self.pod_selector_pattern:
                # Get difference in current time and time when the
                # operator was first executed (in seconds)
                t_diff = (datetime.now() - self.start_time).total_seconds()

                # Note that we will end up with a floating number for
                # 't_diff' and will need to round it up to the nearest
                # integer
                t_diff_int = int(math.ceil(t_diff))

                try:
                    get_pod_logs(selector['pod_pattern'], self.ucp_namespace,
                                 selector['container'], t_diff_int)

                except K8sLoggingException as e:
                    LOG.error(e)

        else:
            LOG.debug("There are no pod logs specified to retrieve")

    def _setup_target_nodes(self):
        """Sets up the target nodes field for this action

        When managing a targeted action, this step needs to resolve the
        target node. If there are no targets found (should be caught before
        invocation of the DAG), then raise an exception so that it does not
        try to take action on more nodes than targeted.
        Later, when creating the deployment group, if this value
        (self.target_nodes) is set, it will be used in lieu of the design
        based deployment strategy.
        target_nodes will be a comma separated string provided as part of the
        parameters to an action on input to Shipyard.
        """
        if self.action_type == 'targeted':
            t_nodes = self.action_params.get('target_nodes', '')
            self.target_nodes = [n.strip() for n in t_nodes.split(',')]
            if not self.target_nodes:
                raise AirflowException(
                    '{} ({}) requires targeted nodes, but was unable to '
                    'resolve any targets in {}'.format(
                        self.main_dag_name, self.action_id,
                        self.__class__.__name__))
            LOG.info("Target Nodes for action: [%s]",
                     ', '.join(self.target_nodes))
        else:
            self.target_nodes = None

    def _deckhand_design_ref(self):
        """Assemble a deckhand design_ref"""
        # Retrieve DeckHand Endpoint Information
        LOG.info("Assembling a design ref using revision: %s",
                 self.revision_id)
        deckhand_svc_endpoint = self.endpoints.endpoint_by_name(
            service_endpoint.DECKHAND)
        # This URL will be used to retrieve the Site Design YAMLs
        deckhand_path = "deckhand+{}".format(deckhand_svc_endpoint)
        design_ref = os.path.join(deckhand_path, "revisions",
                                  str(self.revision_id), "rendered-documents")
        LOG.info("Design Reference is %s", design_ref)
        return design_ref

    def get_unique_doc(self, schema, name, revision_id=None):
        """Retrieve a specific document from Deckhand

        :param schema: the schema of the document
        :param name: the metadata.name of the document
        :param revision_id: the deckhand revision, or defaults to
            self.revision_id
        Wraps the document_validation_utils call to get the same.
        Returns the sepcified document or raises an Airflow exception.
        """
        if revision_id is None:
            revision_id = self.revision_id

        LOG.info("Retrieve shipyard/DeploymentConfiguration/v1, "
                 "deployment-configuration from Deckhand")
        try:
            return self.doc_utils.get_unique_doc(revision_id=revision_id,
                                                 name=name,
                                                 schema=schema)
        except Exception as ex:
            LOG.error(
                "A document was expected to be available: Name: %s, "
                "Schema: %s, Deckhand revision: %s, but there was an "
                "error attempting to retrieve it. Since this document's "
                "contents may be critical to the proper operation of "
                "the workflow, this is fatal.", schema, name, revision_id)
            LOG.exception(ex)
            # if the document is not found for ANY reason, the workflow is
            # broken. Raise an Airflow Exception.
            raise AirflowException(ex)

    def _get_shipyard_db_engine(self):
        """Lazy initialize an engine for the Shipyard database.

        :returns: a SQLAlchemy engine for the Shipyard database.

        Developer's Note: Initially the idea was to use the PostgresHook and
        retrieve an engine from there as is done with the concurrency check,
        but since we have easy access to a configuration file, this does
        direct SQLAlchemy to get the engine. By using the config, the database
        connection is not exposed as environment variables -- which is one way
        that Airflow registers database connections for use by the dbApiHook
        """
        if self._shipyard_db_engine is None:
            connection_string = self.config.get(BASE, 'postgresql_db')
            pool_size = self.config.getint(BASE, 'pool_size')
            max_overflow = self.config.getint(BASE, 'pool_overflow')
            pool_pre_ping = self.config.getboolean(BASE, 'pool_pre_ping')
            pool_recycle = self.config.getint(BASE, 'connection_recycle')
            pool_timeout = self.config.getint(BASE, 'pool_timeout')
            self._shipyard_db_engine = sqlalchemy.create_engine(
                connection_string,
                pool_size=pool_size,
                max_overflow=max_overflow,
                pool_pre_ping=pool_pre_ping,
                pool_recycle=pool_recycle,
                pool_timeout=pool_timeout)
            LOG.info(
                "Initialized Shipyard database connection with pool "
                "size: %d, max overflow: %d, pool pre ping: %s, pool "
                "recycle: %d, and pool timeout: %d", pool_size, max_overflow,
                pool_pre_ping, pool_recycle, pool_timeout)

        return self._shipyard_db_engine

    @shipyard_service_token
    def _token_getter(self):
        # Generator method to get a shipyard service token
        return self.svc_token

    def _setup_notes_helper(self):
        """Setup a notes helper for use by all descendent operators"""
        connect_timeout = self.config.get(REQUESTS_CONFIG,
                                          'notes_connect_timeout')
        read_timeout = self.config.get(REQUESTS_CONFIG, 'notes_read_timeout')
        self.notes_helper = NotesHelper(
            NotesManager(storage=ShipyardSQLNotesStorage(
                self._get_shipyard_db_engine),
                         get_token=self._token_getter,
                         connect_timeout=connect_timeout,
                         read_timeout=read_timeout))
Example #11
0
class DrydockBaseOperator(BaseOperator):
    """Drydock Base Operator

    All drydock related workflow operators will use the drydock
    base operator as the parent and inherit attributes and methods
    from this class

    """
    @apply_defaults
    def __init__(self,
                 deckhand_design_ref=None,
                 deckhand_svc_type='deckhand',
                 drydock_client=None,
                 drydock_svc_endpoint=None,
                 drydock_svc_type='physicalprovisioner',
                 drydock_task_id=None,
                 main_dag_name=None,
                 node_filter=None,
                 redeploy_server=None,
                 shipyard_conf=None,
                 sub_dag_name=None,
                 svc_session=None,
                 svc_token=None,
                 xcom_push=True,
                 *args,
                 **kwargs):
        """Initialization of DrydockBaseOperator object.

        :param deckhand_design_ref: A URI reference to the design documents
        :param deckhand_svc_type: Deckhand Service Type
        :param drydockclient: An instance of drydock client
        :param drydock_svc_endpoint: Drydock Service Endpoint
        :param drydock_svc_type: Drydock Service Type
        :param drydock_task_id: Drydock Task ID
        :param main_dag_name: Parent Dag
        :param node_filter: A filter for narrowing the scope of the task.
                            Valid fields are 'node_names', 'rack_names',
                            'node_tags'. Note that node filter is turned
                            off by default, i.e. all nodes will be deployed.
        :param redeploy_server: Server to be redeployed
        :param shipyard_conf: Location of shipyard.conf
        :param sub_dag_name: Child Dag
        :param svc_session: Keystone Session
        :param svc_token: Keystone Token
        :param xcom_push: xcom usage

        The Drydock operator assumes that prior steps have set xcoms for
        the action and the deployment configuration

        """

        super(DrydockBaseOperator, self).__init__(*args, **kwargs)
        self.deckhand_design_ref = deckhand_design_ref
        self.deckhand_svc_type = deckhand_svc_type
        self.drydock_client = drydock_client
        self.drydock_svc_endpoint = drydock_svc_endpoint
        self.drydock_svc_type = drydock_svc_type
        self.drydock_task_id = drydock_task_id
        self.main_dag_name = main_dag_name
        self.node_filter = node_filter
        self.redeploy_server = redeploy_server
        self.shipyard_conf = shipyard_conf
        self.sub_dag_name = sub_dag_name
        self.svc_session = svc_session
        self.svc_token = svc_token
        self.xcom_push_flag = xcom_push

    def execute(self, context):

        # Execute drydock base function
        self.drydock_base(context)

        # Exeute child function
        self.do_execute()

    def drydock_base(self, context):
        # Initialize Variables
        drydock_url = None
        dd_session = None

        # Define task_instance
        task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, task_instance)
        self.action_info = self.xcom_puller.get_action_info()
        self.dc = self.xcom_puller.get_deployment_configuration()

        # Logs uuid of action performed by the Operator
        logging.info("DryDock Operator for action %s", self.action_info['id'])

        # Retrieve information of the server that we want to redeploy if user
        # executes the 'redeploy_server' dag
        # Set node filter to be the server that we want to redeploy
        if self.action_info['dag_id'] == 'redeploy_server':
            self.redeploy_server = (
                self.action_info['parameters']['server-name'])

            if self.redeploy_server:
                logging.info("Server to be redeployed is %s",
                             self.redeploy_server)
                self.node_filter = self.redeploy_server
            else:
                raise AirflowException('Unable to retrieve information of '
                                       'node to be redeployed!')

        # Retrieve Endpoint Information
        self.drydock_svc_endpoint = ucp_service_endpoint(
            self, svc_type=self.drydock_svc_type)

        logging.info("Drydock endpoint is %s", self.drydock_svc_endpoint)

        # Parse DryDock Service Endpoint
        drydock_url = urlparse(self.drydock_svc_endpoint)

        # Build a DrydockSession with credentials and target host
        # information.
        # The DrydockSession will care for TCP connection pooling
        # and header management
        logging.info("Build DryDock Session")
        dd_session = session.DrydockSession(drydock_url.hostname,
                                            port=drydock_url.port,
                                            auth_gen=self._auth_gen)

        # Raise Exception if we are not able to set up the session
        if dd_session:
            logging.info("Successfully Set Up DryDock Session")
        else:
            raise AirflowException("Failed to set up Drydock Session!")

        # Use the DrydockSession to build a DrydockClient that can
        # be used to make one or more API calls
        logging.info("Create DryDock Client")
        self.drydock_client = client.DrydockClient(dd_session)

        # Raise Exception if we are not able to build the client
        if self.drydock_client:
            logging.info("Successfully Set Up DryDock client")
        else:
            raise AirflowException("Failed to set up Drydock Client!")

        # Retrieve DeckHand Endpoint Information
        deckhand_svc_endpoint = ucp_service_endpoint(
            self, svc_type=self.deckhand_svc_type)

        logging.info("Deckhand endpoint is %s", deckhand_svc_endpoint)

        # Retrieve last committed revision id
        committed_revision_id = self.xcom_puller.get_design_version()

        # Form DeckHand Design Reference Path
        # This URL will be used to retrieve the Site Design YAMLs
        deckhand_path = "deckhand+" + deckhand_svc_endpoint
        self.deckhand_design_ref = os.path.join(deckhand_path, "revisions",
                                                str(committed_revision_id),
                                                "rendered-documents")
        if self.deckhand_design_ref:
            logging.info("Design YAMLs will be retrieved from %s",
                         self.deckhand_design_ref)
        else:
            raise AirflowException("Unable to Retrieve Design Reference!")

    @shipyard_service_token
    def _auth_gen(self):
        # Generator method for the Drydock Session to use to get the
        # auth headers necessary
        return [('X-Auth-Token', self.svc_token)]

    def create_task(self, task_action):

        # Initialize Variables
        create_task_response = {}

        # Node Filter
        logging.info("Nodes Filter List: %s", self.node_filter)

        try:
            # Create Task
            create_task_response = self.drydock_client.create_task(
                design_ref=self.deckhand_design_ref,
                task_action=task_action,
                node_filter=self.node_filter)

        except errors.ClientError as client_error:
            raise AirflowException(client_error)

        # Retrieve Task ID
        self.drydock_task_id = create_task_response['task_id']
        logging.info('Drydock %s task ID is %s', task_action,
                     self.drydock_task_id)

        # Raise Exception if we are not able to get the task_id from
        # Drydock
        if self.drydock_task_id:
            return self.drydock_task_id
        else:
            raise AirflowException("Unable to create task!")

    def query_task(self, interval, time_out):

        # Calculate number of times to execute the 'for' loop
        # Convert 'time_out' and 'interval' from string into integer
        # The result from the division will be a floating number which
        # We will round off to nearest whole number
        end_range = round(int(time_out) / int(interval))

        logging.info('Task ID is %s', self.drydock_task_id)

        # Query task status
        for i in range(0, end_range + 1):
            try:
                # Retrieve current task state
                task_state = self.drydock_client.get_task(
                    task_id=self.drydock_task_id)

                task_status = task_state['status']
                task_result = task_state['result']['status']

                logging.info("Current status of task id %s is %s",
                             self.drydock_task_id, task_status)

            except errors.ClientError as client_error:
                raise AirflowException(client_error)

            except:
                # There can be situations where there are intermittent network
                # issues that prevents us from retrieving the task state. We
                # will want to retry in such situations.
                logging.warning("Unable to retrieve task state. Retrying...")

            # Raise Time Out Exception
            if task_status == 'running' and i == end_range:
                self.task_failure(False)

            # Exit 'for' loop if the task is in 'complete' or 'terminated'
            # state
            if task_status in ['complete', 'terminated']:
                logging.info('Task result is %s', task_result)
                break
            else:
                time.sleep(int(interval))

        # Get final task result
        if task_result == 'success':
            logging.info('Task id %s has been successfully completed',
                         self.drydock_task_id)
        else:
            self.task_failure(True)

    def task_failure(self, _task_failure):

        logging.info('Retrieving all tasks records from Drydock...')

        try:
            # Get all tasks records
            all_tasks = self.drydock_client.get_tasks()

            # Create a dictionary of tasks records with 'task_id' as key
            all_task_ids = {t['task_id']: t for t in all_tasks}

        except errors.ClientError as client_error:
            raise AirflowException(client_error)

        # Retrieve the failed parent task and assign it to list
        failed_task = ([
            x for x in all_tasks if x['task_id'] == self.drydock_task_id
        ])

        # Print detailed information of failed parent task in json output
        # Since there is only 1 failed parent task, we will print index 0
        # of the list
        if failed_task:
            logging.error('%s task has either failed or timed out',
                          failed_task[0]['action'])

            logging.error(json.dumps(failed_task[0], indent=4, sort_keys=True))

        # Get the list of subtasks belonging to the failed parent task
        subtask_id_list = failed_task[0]['subtask_id_list']

        logging.info("Printing information of failed sub-tasks...")

        # Print detailed information of failed step(s) under each subtask
        # This will help to provide additional information for troubleshooting
        # purpose.
        for subtask_id in subtask_id_list:

            logging.info("Retrieving details of subtask %s...", subtask_id)

            # Retrieve task information
            task = all_task_ids.get(subtask_id)

            if task:
                # Print subtask action and state
                logging.info("%s subtask is in %s state", task['action'],
                             task['result']['status'])

                # Print list containing steps in failure state
                if task['result']['failures']:
                    logging.error("The following steps have failed:")
                    logging.error(task['result']['failures'])

                    message_list = (task['result']['details']['messageList']
                                    or [])

                    # Print information of failed steps
                    for message in message_list:
                        is_error = message['error'] is True

                        if is_error:
                            logging.error(
                                json.dumps(message, indent=4, sort_keys=True))
                else:
                    logging.info("No failed step detected for subtask %s",
                                 subtask_id)

            else:
                raise AirflowException("Unable to retrieve subtask info!")

        # Raise Exception to terminate workflow
        if _task_failure:
            raise AirflowException("Failed to Execute/Complete Task!")
        else:
            raise AirflowException("Task Execution Timed Out!")
Example #12
0
    def drydock_base(self, context):
        # Initialize Variables
        drydock_url = None
        dd_session = None

        # Define task_instance
        task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, task_instance)
        self.action_info = self.xcom_puller.get_action_info()
        self.dc = self.xcom_puller.get_deployment_configuration()

        # Logs uuid of action performed by the Operator
        logging.info("DryDock Operator for action %s", self.action_info['id'])

        # Retrieve information of the server that we want to redeploy if user
        # executes the 'redeploy_server' dag
        # Set node filter to be the server that we want to redeploy
        if self.action_info['dag_id'] == 'redeploy_server':
            self.redeploy_server = (
                self.action_info['parameters']['server-name'])

            if self.redeploy_server:
                logging.info("Server to be redeployed is %s",
                             self.redeploy_server)
                self.node_filter = self.redeploy_server
            else:
                raise AirflowException('Unable to retrieve information of '
                                       'node to be redeployed!')

        # Retrieve Endpoint Information
        self.drydock_svc_endpoint = ucp_service_endpoint(
            self, svc_type=self.drydock_svc_type)

        logging.info("Drydock endpoint is %s", self.drydock_svc_endpoint)

        # Parse DryDock Service Endpoint
        drydock_url = urlparse(self.drydock_svc_endpoint)

        # Build a DrydockSession with credentials and target host
        # information.
        # The DrydockSession will care for TCP connection pooling
        # and header management
        logging.info("Build DryDock Session")
        dd_session = session.DrydockSession(drydock_url.hostname,
                                            port=drydock_url.port,
                                            auth_gen=self._auth_gen)

        # Raise Exception if we are not able to set up the session
        if dd_session:
            logging.info("Successfully Set Up DryDock Session")
        else:
            raise AirflowException("Failed to set up Drydock Session!")

        # Use the DrydockSession to build a DrydockClient that can
        # be used to make one or more API calls
        logging.info("Create DryDock Client")
        self.drydock_client = client.DrydockClient(dd_session)

        # Raise Exception if we are not able to build the client
        if self.drydock_client:
            logging.info("Successfully Set Up DryDock client")
        else:
            raise AirflowException("Failed to set up Drydock Client!")

        # Retrieve DeckHand Endpoint Information
        deckhand_svc_endpoint = ucp_service_endpoint(
            self, svc_type=self.deckhand_svc_type)

        logging.info("Deckhand endpoint is %s", deckhand_svc_endpoint)

        # Retrieve last committed revision id
        committed_revision_id = self.xcom_puller.get_design_version()

        # Form DeckHand Design Reference Path
        # This URL will be used to retrieve the Site Design YAMLs
        deckhand_path = "deckhand+" + deckhand_svc_endpoint
        self.deckhand_design_ref = os.path.join(deckhand_path, "revisions",
                                                str(committed_revision_id),
                                                "rendered-documents")
        if self.deckhand_design_ref:
            logging.info("Design YAMLs will be retrieved from %s",
                         self.deckhand_design_ref)
        else:
            raise AirflowException("Unable to Retrieve Design Reference!")
Example #13
0
class UcpBaseOperator(BaseOperator):
    """UCP Base Operator

    All UCP related workflow operators will use the UCP base
    operator as the parent and inherit attributes and methods
    from this class

    """
    @apply_defaults
    def __init__(self,
                 main_dag_name=None,
                 pod_selector_pattern=None,
                 shipyard_conf=None,
                 start_time=None,
                 xcom_push=True,
                 *args,
                 **kwargs):
        """Initialization of UcpBaseOperator object.

        :param continue_processing: A boolean value on whether to continue
                                    with the workflow. Defaults to True.
        :param main_dag_name: Parent Dag
        :param pod_selector_pattern: A list containing the information on
                                     the patterns of the Pod name and name
                                     of the associated container for log
                                     queries. This will allow us to query
                                     multiple components, e.g. MAAS and
                                     Drydock at the same time. It also allows
                                     us to query the logs of specific container
                                     in Pods with multiple containers. For
                                     instance the Airflow worker pod contains
                                     both the airflow-worker container and the
                                     log-rotate container.
        :param shipyard_conf: Location of shipyard.conf
        :param start_time: Time when Operator gets executed
        :param xcom_push: xcom usage

        """

        super(UcpBaseOperator, self).__init__(*args, **kwargs)
        self.continue_processing = True
        self.main_dag_name = main_dag_name
        self.pod_selector_pattern = pod_selector_pattern or []
        self.shipyard_conf = shipyard_conf
        self.start_time = datetime.now()
        self.xcom_push_flag = xcom_push
        self.doc_utils = _get_document_util(self.shipyard_conf)
        self.endpoints = service_endpoint.ServiceEndpoints(self.shipyard_conf)

    def execute(self, context):

        # Execute UCP base function
        self.ucp_base(context)

        # Execute base function
        self.run_base(context)

        if self.continue_processing:
            # Exeute child function
            self.do_execute()

    def ucp_base(self, context):

        LOG.info("Running UCP Base Operator...")

        # Read and parse shiyard.conf
        config = configparser.ConfigParser()
        config.read(self.shipyard_conf)

        # Initialize variable
        self.ucp_namespace = config.get('k8s_logs', 'ucp_namespace')

        # Define task_instance
        self.task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance)
        self.action_info = self.xcom_puller.get_action_info()
        self.dc = self.xcom_puller.get_deployment_configuration()
        self.revision_id = self.action_info['committed_rev_id']
        self.design_ref = self._deckhand_design_ref()

    def get_k8s_logs(self):
        """Retrieve Kubernetes pod/container logs specified by an opererator

        This method is "best effort" and should not prevent the progress of
        the workflow processing
        """
        if self.pod_selector_pattern:
            for selector in self.pod_selector_pattern:
                # Get difference in current time and time when the
                # operator was first executed (in seconds)
                t_diff = (datetime.now() - self.start_time).total_seconds()

                # Note that we will end up with a floating number for
                # 't_diff' and will need to round it up to the nearest
                # integer
                t_diff_int = int(math.ceil(t_diff))

                try:
                    get_pod_logs(selector['pod_pattern'], self.ucp_namespace,
                                 selector['container'], t_diff_int)

                except K8sLoggingException as e:
                    LOG.error(e)

        else:
            LOG.debug("There are no pod logs specified to retrieve")

    def _deckhand_design_ref(self):
        """Assemble a deckhand design_ref"""
        # Retrieve DeckHand Endpoint Information
        LOG.info("Assembling a design ref using revision: %s",
                 self.revision_id)
        deckhand_svc_endpoint = self.endpoints.endpoint_by_name(
            service_endpoint.DECKHAND)
        # This URL will be used to retrieve the Site Design YAMLs
        deckhand_path = "deckhand+{}".format(deckhand_svc_endpoint)
        design_ref = os.path.join(deckhand_path, "revisions",
                                  str(self.revision_id), "rendered-documents")
        LOG.info("Design Reference is %s", design_ref)
        return design_ref

    def get_unique_doc(self, schema, name, revision_id=None):
        """Retrieve a specific document from Deckhand

        :param schema: the schema of the document
        :param name: the metadata.name of the document
        :param revision_id: the deckhand revision, or defaults to
            self.revision_id
        Wraps the document_validation_utils call to get the same.
        Returns the sepcified document or raises an Airflow exception.
        """
        if revision_id is None:
            revision_id = self.revision_id

        LOG.info("Retrieve shipyard/DeploymentConfiguration/v1, "
                 "deployment-configuration from Deckhand")
        try:
            return self.doc_utils.get_unique_doc(revision_id=revision_id,
                                                 name=name,
                                                 schema=schema)
        except Exception as ex:
            LOG.error(
                "A document was expected to be available: Name: %s, "
                "Schema: %s, Deckhand revision: %s, but there was an "
                "error attempting to retrieve it. Since this document's "
                "contents may be critical to the proper operation of "
                "the workflow, this is fatal.", schema, name, revision_id)
            LOG.exception(ex)
            # if the document is not found for ANY reason, the workflow is
            # broken. Raise an Airflow Exception.
            raise AirflowException(ex)
Example #14
0
class UcpHealthCheckOperator(BaseOperator):
    """
    UCP Health Checks
    """
    @apply_defaults
    def __init__(self,
                 shipyard_conf=None,
                 main_dag_name=None,
                 xcom_push=True,
                 *args,
                 **kwargs):

        super(UcpHealthCheckOperator, self).__init__(*args, **kwargs)
        self.shipyard_conf = shipyard_conf
        self.main_dag_name = main_dag_name
        self.xcom_push_flag = xcom_push

    def execute(self, context):

        # Initialize variable
        ucp_components = [
            'armada', 'deckhand', 'kubernetesprovisioner',
            'physicalprovisioner', 'shipyard'
        ]

        # Define task_instance
        self.task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance)
        self.action_info = self.xcom_puller.get_action_info()

        # Set up xcom_pusher to push values to xcom
        self.xcom_pusher = XcomPusher(self.task_instance)

        # Loop through various UCP Components
        for component in ucp_components:

            # Retrieve Endpoint Information
            service_endpoint = ucp_service_endpoint(self, svc_type=component)
            LOG.info("%s endpoint is %s", component, service_endpoint)

            # Construct Health Check Endpoint
            healthcheck_endpoint = os.path.join(service_endpoint, 'health')

            LOG.info("%s healthcheck endpoint is %s", component,
                     healthcheck_endpoint)

            try:
                LOG.info("Performing Health Check on %s", component)
                # Set health check timeout to 30 seconds
                req = requests.get(healthcheck_endpoint, timeout=30)

                # An empty response/body returned by a component means
                # that it is healthy
                if req.status_code == 204:
                    LOG.info("%s is alive and healthy", component)

            except requests.exceptions.RequestException as e:
                self.log_health_exception(component, e)

    def log_health_exception(self, component, error_messages):
        """Logs Exceptions for health check
        """
        # If Drydock health check fails and continue-on-fail, continue
        # and create xcom key 'drydock_continue_on_fail'
        if (component == 'physicalprovisioner' and
                self.action_info['parameters'].get('continue-on-fail').lower()
                == 'true' and self.action_info['dag_id']
                in ['update_site', 'deploy_site']):
            LOG.warning('Drydock did not pass health check. Continuing '
                        'as "continue-on-fail" option is enabled.')
            self.xcom_pusher.xcom_push(key='drydock_continue_on_fail',
                                       value=True)

        else:
            LOG.error(error_messages)
            raise AirflowException(
                "Health check failed for %s component on "
                "dag_id=%s. Details: %s" %
                (component, self.action_info.get('dag_id'), error_messages))
Example #15
0
class ArmadaBaseOperator(BaseOperator):
    """Armada Base Operator

    All armada related workflow operators will use the aramda
    base operator as the parent and inherit attributes and methods
    from this class

    """
    @apply_defaults
    def __init__(self,
                 armada_svc_type='armada',
                 deckhand_svc_type='deckhand',
                 main_dag_name=None,
                 query={},
                 shipyard_conf=None,
                 sub_dag_name=None,
                 svc_session=None,
                 svc_token=None,
                 xcom_push=True,
                 *args,
                 **kwargs):
        """Initialization of ArmadaBaseOperator object.

        :param armada_svc_type: Armada Service Type
        :param deckhand_svc_type: Deckhand Service Type
        :param main_dag_name: Parent Dag
        :param query: A dictionary containing explicit query string parameters
        :param shipyard_conf: Location of shipyard.conf
        :param sub_dag_name: Child Dag
        :param svc_session: Keystone Session
        :param svc_token: Keystone Token
        :param xcom_push: xcom usage

        The Armada operator assumes that prior steps have set xcoms for
        the action and the deployment configuration

        """

        super(ArmadaBaseOperator, self).__init__(*args, **kwargs)
        self.armada_svc_type = armada_svc_type
        self.deckhand_svc_type = deckhand_svc_type
        self.main_dag_name = main_dag_name
        self.query = query
        self.shipyard_conf = shipyard_conf
        self.sub_dag_name = sub_dag_name
        self.svc_session = svc_session
        self.svc_token = svc_token
        self.xcom_push_flag = xcom_push

    def execute(self, context):

        # Execute armada base function
        self.armada_base(context)

        # Exeute child function
        self.do_execute()

    @shipyard_service_token
    def armada_base(self, context):

        # Define task_instance
        self.task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance)
        self.action_info = self.xcom_puller.get_action_info()
        self.dc = self.xcom_puller.get_deployment_configuration()

        # Set up xcom_pusher to push values to xcom
        self.xcom_pusher = XcomPusher(self.task_instance)

        # Logs uuid of action performed by the Operator
        logging.info("Armada Operator for action %s", self.action_info['id'])

        # Retrieve Endpoint Information
        armada_svc_endpoint = ucp_service_endpoint(
            self, svc_type=self.armada_svc_type)

        # Set up armada client
        self.armada_client = self._init_armada_client(armada_svc_endpoint,
                                                      self.svc_token)

        # Retrieve DeckHand Endpoint Information
        deckhand_svc_endpoint = ucp_service_endpoint(
            self, svc_type=self.deckhand_svc_type)

        # Retrieve last committed revision id
        committed_revision_id = self.xcom_puller.get_design_version()

        # Get deckhand design reference url
        self.deckhand_design_ref = self._init_deckhand_design_ref(
            deckhand_svc_endpoint, committed_revision_id)

    @staticmethod
    def _init_armada_client(armada_svc_endpoint, svc_token):

        logging.info("Armada endpoint is %s", armada_svc_endpoint)

        # Parse Armada Service Endpoint
        armada_url = urlparse(armada_svc_endpoint)

        # Build a ArmadaSession with credentials and target host
        # information.
        logging.info("Build Armada Session")
        a_session = session.ArmadaSession(host=armada_url.hostname,
                                          port=armada_url.port,
                                          scheme='http',
                                          token=svc_token,
                                          marker=None)

        # Raise Exception if we are not able to set up the session
        if a_session:
            logging.info("Successfully Set Up Armada Session")
        else:
            raise AirflowException("Failed to set up Armada Session!")

        # Use the ArmadaSession to build a ArmadaClient that can
        # be used to make one or more API calls
        logging.info("Create Armada Client")
        _armada_client = client.ArmadaClient(a_session)

        # Raise Exception if we are not able to build armada client
        if _armada_client:
            logging.info("Successfully Set Up Armada client")

            return _armada_client
        else:
            raise AirflowException("Failed to set up Armada client!")

    @staticmethod
    def _init_deckhand_design_ref(deckhand_svc_endpoint,
                                  committed_revision_id):

        logging.info("Deckhand endpoint is %s", deckhand_svc_endpoint)

        # Form DeckHand Design Reference Path
        # This URL will be used to retrieve the Site Design YAMLs
        deckhand_path = "deckhand+" + deckhand_svc_endpoint
        _deckhand_design_ref = os.path.join(deckhand_path, "revisions",
                                            str(committed_revision_id),
                                            "rendered-documents")

        if _deckhand_design_ref:
            logging.info("Design YAMLs will be retrieved from %s",
                         _deckhand_design_ref)

            return _deckhand_design_ref
        else:
            raise AirflowException("Unable to Retrieve Design Reference!")

    @get_pod_port_ip('tiller', namespace='kube-system')
    def get_tiller_info(self, pods_ip_port={}):

        # Assign value to the 'query' dictionary so that we can pass
        # it via the Armada Client
        self.query['tiller_host'] = pods_ip_port['tiller']['ip']
        self.query['tiller_port'] = pods_ip_port['tiller']['port']
Example #16
0
class UcpHealthCheckOperator(BaseOperator):
    """
    Airship Health Checks
    """

    @apply_defaults
    def __init__(self,
                 shipyard_conf=None,
                 main_dag_name=None,
                 xcom_push=True,
                 *args,
                 **kwargs):

        super(UcpHealthCheckOperator, self).__init__(*args, **kwargs)
        self.shipyard_conf = shipyard_conf
        self.main_dag_name = main_dag_name
        self.xcom_push_flag = xcom_push
        self.endpoints = service_endpoint.ServiceEndpoints(self.shipyard_conf)

    def execute(self, context):

        # Initialize variable
        ucp_components = [
            service_endpoint.ARMADA,
            service_endpoint.DECKHAND,
            service_endpoint.DRYDOCK,
            service_endpoint.PROMENADE,
            service_endpoint.SHIPYARD
        ]

        # Define task_instance
        self.task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance)
        self.action_info = self.xcom_puller.get_action_info()

        # Set up xcom_pusher to push values to xcom
        self.xcom_pusher = XcomPusher(self.task_instance)

        # Loop through various Airship Components
        for component in ucp_components:

            # Retrieve Endpoint Information
            endpoint = self.endpoints.endpoint_by_name(component)
            LOG.info("%s endpoint is %s", component, endpoint)

            # Construct Health Check Endpoint
            healthcheck_endpoint = os.path.join(endpoint,
                                                'health')

            try:
                LOG.info("Performing Health Check on %s at %s", component,
                         healthcheck_endpoint)
                # Set health check timeout to 30 seconds
                req = requests.get(healthcheck_endpoint, timeout=30)

                # An empty response/body returned by a component means
                # that it is healthy
                if req.status_code == 204:
                    LOG.info("%s is alive and healthy", component)

            except requests.exceptions.RequestException as e:
                self.log_health_exception(component, e)

    def log_health_exception(self, component, error_messages):
        """Logs Exceptions for health check
        """
        # If Drydock health check fails and continue-on-fail, continue
        # and create xcom key 'drydock_continue_on_fail'
        # Note that 'update_software' does not interact with Drydock, and
        # therefore does not use the continue-on-fail option.
        if (component == service_endpoint.DRYDOCK and
                self.action_info['parameters'].get(
                    'continue-on-fail', 'false').lower() == 'true' and
                self.action_info['dag_id'] in ['update_site', 'deploy_site']):
            LOG.warning('Drydock did not pass health check. Continuing '
                        'as "continue-on-fail" option is enabled.')
            self.xcom_pusher.xcom_push(key='drydock_continue_on_fail',
                                       value=True)

        else:
            LOG.error(error_messages)
            raise AirflowException("Health check failed for %s component on "
                                   "dag_id=%s. Details: %s" %
                                   (component, self.action_info.get('dag_id'),
                                    error_messages))
Example #17
0
class DeploymentConfigurationOperator(BaseOperator):
    """Deployment Configuration Operator

    Retrieve the deployment-configuration from Deckhand for use throughout
    the workflow. Put the configuration into a dictionary.

    Failures are raised:
      -  when Deckhand cannot be contacted
      -  when the DeploymentConfiguration (deployment-configuration) document
         cannot be retrieved
    """
    config_keys_defaults = {
        "physical_provisioner.deployment_strategy": None,
        "physical_provisioner.deploy_interval": 30,
        "physical_provisioner.deploy_timeout": 3600,
        "physical_provisioner.destroy_interval": 30,
        "physical_provisioner.destroy_timeout": 900,
        "physical_provisioner.join_wait": 120,
        "physical_provisioner.prepare_node_interval": 30,
        "physical_provisioner.prepare_node_timeout": 1800,
        "physical_provisioner.prepare_site_interval": 10,
        "physical_provisioner.prepare_site_timeout": 300,
        "physical_provisioner.verify_interval": 10,
        "physical_provisioner.verify_timeout": 60,
        "physical_provisioner.relabel_nodes_interval": 30,
        "physical_provisioner.relabel_nodes_timeout": 900,
        "kubernetes.node_status_interval": 30,
        "kubernetes.node_status_timeout": 1800,
        "kubernetes_provisioner.drain_timeout": 3600,
        "kubernetes_provisioner.drain_grace_period": 1800,
        "kubernetes_provisioner.clear_labels_timeout": 1800,
        "kubernetes_provisioner.remove_etcd_timeout": 1800,
        "kubernetes_provisioner.etcd_ready_timeout": 600,
        "armada.get_releases_timeout": 300,
        "armada.get_status_timeout": 300,
        "armada.manifest": "full-site",
        "armada.post_apply_timeout": 2700,
        "armada.validate_design_timeout": 600
    }

    @apply_defaults
    def __init__(self,
                 main_dag_name=None,
                 shipyard_conf=None,
                 *args, **kwargs):
        """Deployment Configuration Operator

        Generate a DeploymentConfigurationOperator to read the deployment's
        configuration for use by other operators

        :param main_dag_name: Parent Dag
        :param shipyard_conf: Location of shipyard.conf
        """
        super(DeploymentConfigurationOperator, self).__init__(*args, **kwargs)
        self.main_dag_name = main_dag_name
        self.shipyard_conf = shipyard_conf
        self.action_info = {}

    def _read_config(self):
        """Read in and parse the shipyard config"""
        self.config = configparser.ConfigParser()
        self.config.read(self.shipyard_conf)

    def execute(self, context):
        """Perform Deployment Configuration extraction"""
        self._read_config()
        revision_id = self.get_revision_id(context.get('task_instance'))
        doc = self.get_doc(revision_id)
        converted = self.map_config_keys(doc)

        # return the mapped configuration so that it can be placed on xcom
        return converted

    def get_revision_id(self, task_instance):
        """Get the revision id from xcom"""
        if task_instance:
            LOG.debug("task_instance found, extracting design version")
            # Get XcomPuller instance
            self.xcom_puller = XcomPuller(self.main_dag_name, task_instance)
            # Set the revision_id to the revision on the xcom
            self.action_info = self.xcom_puller.get_action_info()

            revision_id = self.action_info['committed_rev_id']

            if revision_id:
                LOG.info("Revision is set to: %s for deployment-configuration",
                         revision_id)
                return revision_id
        # either revision id was not on xcom, or the task_instance is messed
        raise AirflowException(
            "Design_revision is not set. Cannot proceed with retrieval of"
            " the design configuration"
        )

    def get_doc(self, revision_id):
        """Get the DeploymentConfiguration document dictionary from Deckhand"""
        schema_fallback = 'shipyard/DeploymentConfiguration/v1'
        schema = self.config.get(DOCUMENT_INFO,
                                 'deployment_configuration_schema',
                                 fallback=schema_fallback)
        name = self.config.get(DOCUMENT_INFO,
                               'deployment_configuration_name',
                               fallback='deployment-configuration')
        LOG.info("Attempting to retrieve {}, {} from Deckhand".format(schema,
                                                                      name))
        filters = {"schema": schema, "metadata.name": name}

        # Create additional headers dict to pass context marker
        # and end user
        addl_headers = None
        if self.action_info:
            context_marker = self.action_info['context_marker']
            end_user = self.action_info['user']
            addl_headers = {
                CustomHeaders.CONTEXT_MARKER.value: context_marker,
                CustomHeaders.END_USER.value: end_user
            }

        try:
            dhclient = DeckhandClientFactory(
                self.shipyard_conf).get_client(addl_headers=addl_headers)
            LOG.info("Deckhand Client acquired")
            doc = dhclient.revisions.documents(revision_id,
                                               rendered=True,
                                               **filters)
        except Exception as ex:
            try:
                failed_url = ex.url
            except AttributeError:
                failed_url = "No URL generated"
            LOG.exception(ex)
            raise AirflowException("Failed to retrieve deployment-"
                                   "configuration yaml using url: "
                                   "{}".format(failed_url))

        if len(doc) == 1 and doc[0].data:
            doc_dict = doc[0].data
        else:
            raise AirflowException("A valid deployment-configuration is "
                                   "required")

        LOG.info("DeploymentConfiguration retrieved")
        return doc_dict

    def map_config_keys(self, cfg_data):
        """Maps the deployment-configuration

        Converts to a more simple map of key-value pairs
        """
        LOG.info("Mapping keys from deployment-configuration")
        return {
            cfg_key: self.get_cfg_value(cfg_data, cfg_key, cfg_default)
            for cfg_key, cfg_default in
            DeploymentConfigurationOperator.config_keys_defaults.items()
        }

    def get_cfg_value(self, cfg_data, cfg_key, cfg_default):
        """Uses the dot notation key to get the value from the design config"""
        data = cfg_data
        for node in cfg_key.split('.'):
            data = data.get(node, {})
        if data:
            LOG.info("Deployment Config value set- %s: %s", cfg_key, data)
            return data
        else:
            LOG.info("Deployment Config using default- %s: %s",
                     cfg_key, cfg_default)
            return cfg_default
class PromenadeBaseOperator(BaseOperator):
    """Promenade Base Operator

    All promenade related workflow operators will use the promenade
    base operator as the parent and inherit attributes and methods
    from this class
    """
    @apply_defaults
    def __init__(self,
                 main_dag_name=None,
                 promenade_svc_endpoint=None,
                 promenade_svc_type='kubernetesprovisioner',
                 redeploy_server=None,
                 shipyard_conf=None,
                 sub_dag_name=None,
                 svc_token=None,
                 xcom_push=True,
                 *args,
                 **kwargs):
        """Initialization of PromenadeBaseOperator object.

        :param main_dag_name: Parent Dag
        :param promenade_svc_endpoint: Promenade Service Endpoint
        :param promenade_svc_type: Promenade Service Type
        :param redeploy_server: Server to be redeployed
        :param shipyard_conf: Path of shipyard.conf
        :param sub_dag_name: Child Dag
        :param svc_token: Keystone Token
        :param xcom_push: xcom usage
        The Drydock operator assumes that prior steps have set xcoms for
        the action and the deployment configuration
        """

        super(PromenadeBaseOperator, self).__init__(*args, **kwargs)
        self.main_dag_name = main_dag_name
        self.promenade_svc_endpoint = promenade_svc_endpoint
        self.promenade_svc_type = promenade_svc_type
        self.redeploy_server = redeploy_server
        self.shipyard_conf = shipyard_conf
        self.sub_dag_name = sub_dag_name
        self.svc_token = svc_token
        self.xcom_push_flag = xcom_push

    def execute(self, context):
        # Execute promenade base function
        self.promenade_base(context)

        # Exeute child function
        self.do_execute()

    @shipyard_service_token
    def promenade_base(self, context):
        # Define task_instance
        task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, task_instance)
        self.action_info = self.xcom_puller.get_action_info()
        self.dc = self.xcom_puller.get_deployment_configuration()

        # Logs uuid of Shipyard action
        logging.info("Executing Shipyard Action %s", self.action_info['id'])

        # Retrieve information of the server that we want to redeploy
        # if user executes the 'redeploy_server' dag
        if self.action_info['dag_id'] == 'redeploy_server':
            self.redeploy_server = self.action_info['parameters'].get(
                'server-name')

            if self.redeploy_server:
                logging.info("Server to be redeployed is %s",
                             self.redeploy_server)
            else:
                raise AirflowException('%s was unable to retrieve the '
                                       'server to be redeployed.' %
                                       self.__class__.__name__)

        # Retrieve promenade endpoint
        self.promenade_svc_endpoint = ucp_service_endpoint(
            self, svc_type=self.promenade_svc_type)

        logging.info("Promenade endpoint is %s", self.promenade_svc_endpoint)
Example #19
0
class UcpBaseOperator(BaseOperator):
    """UCP Base Operator

    All UCP related workflow operators will use the UCP base
    operator as the parent and inherit attributes and methods
    from this class

    """
    @apply_defaults
    def __init__(self,
                 main_dag_name=None,
                 pod_selector_pattern=None,
                 shipyard_conf=None,
                 start_time=None,
                 sub_dag_name=None,
                 xcom_push=True,
                 *args,
                 **kwargs):
        """Initialization of UcpBaseOperator object.

        :param continue_processing: A boolean value on whether to continue
                                    with the workflow. Defaults to True.
        :param main_dag_name: Parent Dag
        :param pod_selector_pattern: A list containing the information on
                                     the patterns of the Pod name and name
                                     of the associated container for log
                                     queries. This will allow us to query
                                     multiple components, e.g. MAAS and
                                     Drydock at the same time. It also allows
                                     us to query the logs of specific container
                                     in Pods with multiple containers. For
                                     instance the Airflow worker pod contains
                                     both the airflow-worker container and the
                                     log-rotate container.
        :param shipyard_conf: Location of shipyard.conf
        :param start_time: Time when Operator gets executed
        :param sub_dag_name: Child Dag
        :param xcom_push: xcom usage

        """

        super(UcpBaseOperator, self).__init__(*args, **kwargs)
        self.continue_processing = True
        self.main_dag_name = main_dag_name
        self.pod_selector_pattern = pod_selector_pattern or []
        self.shipyard_conf = shipyard_conf
        self.start_time = datetime.now()
        self.sub_dag_name = sub_dag_name
        self.xcom_push_flag = xcom_push

    def execute(self, context):

        # Execute UCP base function
        self.ucp_base(context)

        # Execute base function
        self.run_base(context)

        if self.continue_processing:
            # Exeute child function
            self.do_execute()

    def ucp_base(self, context):

        LOG.info("Running UCP Base Operator...")

        # Read and parse shiyard.conf
        config = configparser.ConfigParser()
        config.read(self.shipyard_conf)

        # Initialize variable
        self.ucp_namespace = config.get('k8s_logs', 'ucp_namespace')

        # Define task_instance
        self.task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, self.task_instance)
        self.action_info = self.xcom_puller.get_action_info()
        self.dc = self.xcom_puller.get_deployment_configuration()
        self.revision_id = self.action_info['committed_rev_id']

    def get_k8s_logs(self):
        """Retrieve Kubernetes pod/container logs specified by an opererator

        This method is "best effort" and should not prevent the progress of
        the workflow processing
        """
        if self.pod_selector_pattern:
            for selector in self.pod_selector_pattern:
                # Get difference in current time and time when the
                # operator was first executed (in seconds)
                t_diff = (datetime.now() - self.start_time).total_seconds()

                # Note that we will end up with a floating number for
                # 't_diff' and will need to round it up to the nearest
                # integer
                t_diff_int = int(math.ceil(t_diff))

                try:
                    get_pod_logs(selector['pod_pattern'], self.ucp_namespace,
                                 selector['container'], t_diff_int)

                except K8sLoggingException as e:
                    LOG.error(e)

        else:
            LOG.debug("There are no pod logs specified to retrieve")
Example #20
0
class DeckhandBaseOperator(BaseOperator):

    """Deckhand Base Operator

    All deckhand related workflow operators will use the deckhand
    base operator as the parent and inherit attributes and methods
    from this class

    """

    @apply_defaults
    def __init__(self,
                 committed_ver=None,
                 deckhandclient=None,
                 deckhand_client_read_timeout=None,
                 deckhand_svc_endpoint=None,
                 deckhand_svc_type='deckhand',
                 main_dag_name=None,
                 revision_id=None,
                 shipyard_conf=None,
                 sub_dag_name=None,
                 svc_session=None,
                 svc_token=None,
                 validation_read_timeout=None,
                 xcom_push=True,
                 *args, **kwargs):
        """Initialization of DeckhandBaseOperator object.

        :param committed_ver: Last committed version
        :param deckhandclient: An instance of deckhand client
        :param deckhand_client_read_timeout: Deckhand client connect timeout
        :param deckhand_svc_endpoint: Deckhand Service Endpoint
        :param deckhand_svc_type: Deckhand Service Type
        :param main_dag_name: Parent Dag
        :param revision_id: Target revision for workflow
        :param shipyard_conf: Path of shipyard.conf
        :param sub_dag_name: Child Dag
        :param svc_session: Keystone Session
        :param svc_token: Keystone Token
        :param validation_read_timeout: Deckhand validation timeout
        :param xcom_push: xcom usage

        """

        super(DeckhandBaseOperator, self).__init__(*args, **kwargs)
        self.committed_ver = committed_ver
        self.deckhandclient = deckhandclient
        self.deckhand_client_read_timeout = deckhand_client_read_timeout
        self.deckhand_svc_endpoint = deckhand_svc_endpoint
        self.deckhand_svc_type = deckhand_svc_type
        self.main_dag_name = main_dag_name
        self.revision_id = revision_id
        self.shipyard_conf = shipyard_conf
        self.sub_dag_name = sub_dag_name
        self.svc_session = svc_session
        self.svc_token = svc_token
        self.validation_read_timeout = validation_read_timeout
        self.xcom_push_flag = xcom_push

    def execute(self, context):

        # Execute deckhand base function
        self.deckhand_base(context)

        # Exeute child function
        self.do_execute()

        # Push last committed version to xcom for the
        # 'deckhand_get_design_version' subdag
        if self.sub_dag_name == 'deckhand_get_design_version':
            return self.committed_ver

    @shipyard_service_token
    def deckhand_base(self, context):

        # Read and parse shiyard.conf
        config = configparser.ConfigParser()
        config.read(self.shipyard_conf)

        # Initialize variables
        self.deckhand_client_read_timeout = int(config.get(
            'requests_config', 'deckhand_client_read_timeout'))

        self.validation_read_timeout = int(config.get(
            'requests_config', 'validation_read_timeout'))

        # Define task_instance
        task_instance = context['task_instance']

        # Set up and retrieve values from xcom
        self.xcom_puller = XcomPuller(self.main_dag_name, task_instance)
        self.action_info = self.xcom_puller.get_action_info()

        # Logs uuid of Shipyard action
        logging.info("Executing Shipyard Action %s",
                     self.action_info['id'])

        # Retrieve Endpoint Information
        self.deckhand_svc_endpoint = ucp_service_endpoint(
            self, svc_type=self.deckhand_svc_type)

        logging.info("Deckhand endpoint is %s",
                     self.deckhand_svc_endpoint)

        # Set up DeckHand Client
        logging.info("Setting up DeckHand Client...")

        # NOTE: The communication between the Airflow workers
        # and Deckhand happens via the 'internal' endpoint.
        self.deckhandclient = deckhand_client.Client(
            session=self.svc_session, endpoint_type='internal')

        if not self.deckhandclient:
            raise AirflowException('Failed to set up deckhand client!')

        # Retrieve 'revision_id' from xcom for tasks other than
        # 'deckhand_get_design_version'
        #
        # NOTE: In the case of 'deploy_site', the dag_id will
        # be 'deploy_site.deckhand_get_design_version' for the
        # 'deckhand_get_design_version' task. We need to extract
        # the xcom value from it in order to get the value of the
        # last committed revision ID
        if self.task_id != 'deckhand_get_design_version':

            # Retrieve 'revision_id' from xcom
            self.revision_id = self.xcom_puller.get_design_version()

            if self.revision_id:
                logging.info("Revision ID is %d", self.revision_id)
            else:
                raise AirflowException('Failed to retrieve Revision ID!')