Esempio n. 1
0
    def create_infrastructure_impl(self, infrastructure_id, k8s):
        try:
            logger.info('storage=' + str(k8s.get('storage')))

            for storage_name, storage in k8s.get('storage', {}).items():
                storageSize = storage.get('size', None)
                storageClassName = storage.get('storageClassName', None)
                properties = {}
                if storageClassName == "hostpath":
                    properties['hostpath'] = storage.get('hostpath', None)

                self.create_storage(storage_name, storageSize,
                                    storageClassName, infrastructure_id,
                                    properties)

            for _, network in k8s.get('networks', {}).items():
                network_name = network.get('name', None)
                bridge = network.get('bridge', None)
                subnet = network.get('subnet', None)
                range_start = network.get('range_start', None)
                range_end = network.get('range_end', None)
                self.create_network(infrastructure_id, network_name, bridge,
                                    subnet, range_start, range_end)

            # TODO mapping storageClassName to pods - just have one storage class?
            for pod in k8s.get('pods', []):
                pod_name = pod.get('name', None)
                image = pod.get('image', None)
                container_port = pod.get('container_port', None)
                # storage_name, storageClassName, storageSize
                storage = pod.get('storage', [])
                networks = pod.get('network', [])
                logger.info('pod_name=' + pod_name)
                self.create_pod(pod_name, image, container_port,
                                infrastructure_id, storage, networks)

            self.create_config_map_for_outputs(pod_name, infrastructure_id,
                                               k8s.get('outputs', {}))
        except K8sApiException as e:
            if e.status == 409:
                logger.error('K8s exception1' + str(e))
                self.inf_messaging_service.send_infrastructure_task(
                    InfrastructureTask(
                        infrastructure_id, infrastructure_id, STATUS_FAILED,
                        FailureDetails(FAILURE_CODE_RESOURCE_ALREADY_EXISTS,
                                       "Resource already exists"), {}))
            else:
                logger.error('K8s exception2' + str(e))
                self.inf_messaging_service.send_infrastructure_task(
                    InfrastructureTask(
                        infrastructure_id, infrastructure_id, STATUS_FAILED,
                        FailureDetails(FAILURE_CODE_INTERNAL_ERROR, str(e)),
                        {}))
        except Exception as e:
            logger.error('K8s exception2' + str(e))
            self.inf_messaging_service.send_infrastructure_task(
                InfrastructureTask(
                    infrastructure_id, infrastructure_id, STATUS_FAILED,
                    FailureDetails(FAILURE_CODE_INTERNAL_ERROR, str(e)), {}))
Esempio n. 2
0
    def handle_request(self, request):
        try:
            if request is not None:
                if request.get('logging_context', None) is not None:
                    logging_context.set_from_dict(request['logging_context'])

                if 'request_id' not in request:
                    self.messaging_service.send_lifecycle_execution(
                        LifecycleExecution(
                            None, STATUS_FAILED,
                            FailureDetails(FAILURE_CODE_INTERNAL_ERROR,
                                           "Request must have a request_id"),
                            {}))
                if 'lifecycle_name' not in request:
                    self.messaging_service.send_lifecycle_execution(
                        LifecycleExecution(
                            request['request_id'], STATUS_FAILED,
                            FailureDetails(
                                FAILURE_CODE_INTERNAL_ERROR,
                                "Request must have a lifecycle_name"), {}))
                if 'driver_files' not in request:
                    self.messaging_service.send_lifecycle_execution(
                        LifecycleExecution(
                            request['request_id'], STATUS_FAILED,
                            FailureDetails(FAILURE_CODE_INTERNAL_ERROR,
                                           "Request must have a driver_files"),
                            {}))

                # run the playbook and send the response to the response queue
                logger.debug(
                    'Ansible worker running request {0}'.format(request))
                result = self.ansible_client.run_lifecycle_playbook(request)
                if result is not None:
                    logger.debug(
                        'Ansible worker finished for request {0}: {1}'.format(
                            request, result))
                    self.messaging_service.send_lifecycle_execution(result)
                else:
                    logger.warning(
                        "Empty response from Ansible worker for request {0}".
                        format(request))
            else:
                logger.warning('Null lifecycle request from request queue')
        except Exception as e:
            logger.error('Unexpected exception {0}'.format(e))
            traceback.print_exc(file=sys.stderr)
            # don't want the worker to die without knowing the cause, so catch all exceptions
            if request is not None:
                self.messaging_service.send_lifecycle_execution(
                    LifecycleExecution(
                        request['request_id'], STATUS_FAILED,
                        FailureDetails(FAILURE_CODE_INTERNAL_ERROR,
                                       "Unexpected exception: {0}".format(e)),
                        {}))
        finally:
            # clean up zombie processes (Ansible can leave these behind)
            for p in active_children():
                logger.debug("removed zombie process {0}".format(p.name))
Esempio n. 3
0
    def get_infrastructure(self, infrastructure_id, request_type):
        outputs = {}

        statuses = []
        statuses.append(
            list(
                map(
                    lambda pod: self.__build_pod_status(
                        request_type, pod, outputs),
                    self.coreV1Api().list_namespaced_pod(
                        namespace=self.namespace(),
                        label_selector='infrastructure_id={}'.format(
                            infrastructure_id)))))
        statuses.append(
            list(
                map(
                    lambda pvc: self.__build_pvc_status(
                        request_type, pvc, outputs),
                    self.coreV1Api().list_namespaced_persistent_volume_claim(
                        namespace=self.namespace(),
                        label_selector='infrastructure_id={}'.format(
                            infrastructure_id)))))

        failure_details = None
        status = STATUS_COMPLETE

        if request_type == 'CREATE':
            failed = list(
                filter(lambda x: x['status'] == STATUS_FAILED, statuses))
            if len(failed) > 0:
                status = STATUS_FAILED
                failure_details = FailureDetails(
                    FAILURE_CODE_INFRASTRUCTURE_ERROR, failed[0].status_reason)

            in_progress = list(
                filter(lambda x: x['status'] == STATUS_IN_PROGRESS, statuses))
            if len(in_progress) > 0:
                status = STATUS_IN_PROGRESS

            return InfrastructureTask(infrastructure_id, infrastructure_id,
                                      status, failure_details, outputs)
        elif request_type == 'DELETE':
            failed = list(
                filter(lambda x: x['status'] == STATUS_FAILED, statuses))
            in_progress = list(
                filter(lambda x: x['status'] == STATUS_IN_PROGRESS, statuses))
            if len(failed) > 0:
                status = STATUS_FAILED
                failure_details = FailureDetails(
                    FAILURE_CODE_INFRASTRUCTURE_ERROR, failed[0].status_reason)
            elif len(in_progress) > 0 or len(statuses) > 0:
                status = STATUS_IN_PROGRESS

            return InfrastructureTask(infrastructure_id, infrastructure_id,
                                      status, failure_details, outputs)
        else:
            raise ValueError("Invalud request_type {0}".format(request_type))
Esempio n. 4
0
 def v2_runner_on_failed(self, result, *args, **kwargs):
     """
     Called when a task fails
     Note: even when a loop is used (so v2_runner_item_on_failed/v2_runner_item_on_ok is called for each item) this function is called at the end, when all items have been attempted but one has failed
     """
     logger.debug(
         "v2_runner_on_failed: task={0}, result={1}, task_fields={2}".
         format(result._task, result._result, result._task_fields))
     # TODO: handle ignore_errors?
     self.failed_task = result._task.get_name()
     if 'msg' in result._result and 'Timeout' in result._result[
             'msg'] and 'waiting for privilege escalation prompt' in result._result[
                 'msg']:
         logger.info('Failure to be treated as unreachable:  task ' +
                     str(self.failed_task) + ' failed: ' +
                     str(result._result))
         self.__handle_unreachable(result)
     elif 'module_stderr' in result._result and result._result[
             'module_stderr'].startswith(
                 'ssh:'
             ) and 'Host is unreachable' in result._result['module_stderr']:
         logger.info('Failure to be treated as unreachable: task ' +
                     str(self.failed_task) + ' failed: ' +
                     str(result._result))
         self.__handle_unreachable(result)
     else:
         self.host_failed = True
         self.failure_reason = 'task ' + str(
             self.failed_task) + ' failed: ' + str(result._result)
         self.host_failed_log.append(
             dict(task=self.failed_task, result=result._result))
         self.failure_details = FailureDetails(
             FAILURE_CODE_INFRASTRUCTURE_ERROR, self.failure_reason)
         self.playbook_failed = True
         self._log_event_for_failed_task(result)
 def __build_execution_response(self, stack, request_id):
     request_type, stack_id, operation_id = self.__split_request_id(
         request_id)
     stack_status = stack.get('stack_status', None)
     failure_details = None
     if request_type == CREATE_REQUEST_PREFIX:
         status = self.__determine_create_status(request_id, stack_id,
                                                 stack_status)
     else:
         status = self.__determine_delete_status(request_id, stack_id,
                                                 stack_status)
     if status == STATUS_FAILED:
         description = stack.get('stack_status_reason', None)
         failure_details = FailureDetails(FAILURE_CODE_INFRASTRUCTURE_ERROR,
                                          description)
         status_reason = stack.get('stack_status_reason', None)
     outputs = None
     associated_topology = None
     if request_type == CREATE_REQUEST_PREFIX:
         outputs_from_stack = stack.get('outputs', [])
         outputs = self.__translate_outputs_to_values_dict(
             outputs_from_stack)
     return LifecycleExecution(request_id,
                               status,
                               failure_details=failure_details,
                               outputs=outputs)
 def v2_runner_on_failed(self, result, *args, **kwargs):
     """
     ansible task failed
     """
     logger.debug("v2_runner_on_failed {0} {1} {2}".format(
         result._task, result._result, result._task_fields))
     self.failed_task = result._task.get_name()
     if 'msg' in result._result and 'Timeout' in result._result[
             'msg'] and 'waiting for privilege escalation prompt' in result._result[
                 'msg']:
         logger.debug('Failure to be treated as unreachable:  task ' +
                      str(self.failed_task) + ' failed: ' +
                      str(result._result))
         self.__handle_unreachable(result)
     elif 'module_stderr' in result._result and result._result[
             'module_stderr'].startswith(
                 'ssh:'
             ) and 'Host is unreachable' in result._result['module_stderr']:
         logger.debug('Failure to be treated as unreachable: task ' +
                      str(self.failed_task) + ' failed: ' +
                      str(result._result))
         self.__handle_unreachable(result)
     else:
         self.host_failed = True
         self.failure_reason = 'task ' + str(
             self.failed_task) + ' failed: ' + str(result._result)
         self.host_failed_log.append(
             dict(task=self.failed_task, result=result._result))
         self.failure_details = FailureDetails(
             FAILURE_CODE_INFRASTRUCTURE_ERROR, self.failure_reason)
         self.playbook_failed = True
 def __build_infrastructure_response(self, stack):
     infrastructure_id = stack.get('id')
     stack_status = stack.get('stack_status', None)
     failure_details = None
     if stack_status in [OS_STACK_STATUS_CREATE_IN_PROGRESS, OS_STACK_STATUS_DELETE_IN_PROGRESS]:
         logger.debug('Stack %s has stack_status %s, setting status in response to %s', infrastructure_id, stack_status, STATUS_IN_PROGRESS)
         status = STATUS_IN_PROGRESS
     elif stack_status in [OS_STACK_STATUS_CREATE_COMPLETE, OS_STACK_STATUS_DELETE_COMPLETE]:
         logger.debug('Stack %s has stack_status %s, setting status in response to %s', infrastructure_id, stack_status, STATUS_COMPLETE)
         status = STATUS_COMPLETE
     elif stack_status in [OS_STACK_STATUS_CREATE_FAILED, OS_STACK_STATUS_DELETE_FAILED]:
         logger.debug('Stack %s has stack_status %s, setting status in response to %s', infrastructure_id, stack_status, STATUS_FAILED)
         status = STATUS_FAILED
         description = stack.get('stack_status_reason', None)
         failure_details = FailureDetails(FAILURE_CODE_INFRASTRUCTURE_ERROR, description)
         status_reason = stack.get('stack_status_reason', None)
     else:
         logger.debug('Stack %s has stack_status %s, setting status in response to %s', infrastructure_id, stack_status, STATUS_UNKNOWN)
         status = STATUS_UNKNOWN
     is_create = True
     if stack_status in [OS_STACK_STATUS_DELETE_IN_PROGRESS, OS_STACK_STATUS_DELETE_COMPLETE, OS_STACK_STATUS_DELETE_FAILED]:
         is_create = False
     outputs = None
     if is_create:
         logger.debug('Stack %s last process is a create', infrastructure_id)
         outputs_from_stack = stack.get('outputs', [])
         outputs = self.__translate_outputs_to_values_dict(outputs_from_stack)
     return InfrastructureTask(infrastructure_id, infrastructure_id, status, failure_details, outputs)
    def test_run_lifecycle_missing_lifecycle_name(self):
        # this is needed to ensure logging output appears in test context - see https://stackoverflow.com/questions/7472863/pydev-unittesting-how-to-capture-text-logged-to-a-logging-logger-in-captured-o
        stream_handler.stream = sys.stdout

        request_id = uuid.uuid4().hex

        handler = AnsibleRequestHandler(self.mock_messaging_service,
                                        self.mock_ansible_client)
        handler.handle_request({
            'request_id':
            request_id,
            'driver_files':
            DirectoryTree(self.tmp_workspace),
            'system_properties':
            PropValueMap({}),
            'resource_properties':
            PropValueMap({}),
            'deployment_location':
            PropValueMap({})
        })
        self.check_response_only(
            LifecycleExecution(
                request_id, STATUS_FAILED,
                FailureDetails(FAILURE_CODE_INTERNAL_ERROR,
                               "Request must have a lifecycle_name"), {}))
Esempio n. 9
0
    def v2_runner_on_ok(self, result, *args, **kwargs):
        """
        Called when task execution completes (called for each host the task executes against)
        Note: even when a loop is used (so v2_runner_item_on_ok is called for each successful item) this function is called at the end, when all items have succeeded
        """
        logger.debug('v2_runner_on_ok: {0}'.format(result))

        props = []
        if 'results' in result._result.keys():
            self.facts = result._result['results']
            props = [
                item['ansible_facts'] for item in self.facts
                if 'ansible_facts' in item
            ]
        else:
            self.facts = result._result
            if 'ansible_facts' in self.facts:
                props = [self.facts['ansible_facts']]

        for prop in props:
            for key, value in prop.items():
                if key.startswith(self.ansible_properties.output_prop_prefix):
                    output_facts = {
                        key[len(self.ansible_properties.output_prop_prefix):]:
                        value
                    }
                    logger.debug('output props = {0}'.format(output_facts))
                    self.properties.update(output_facts)
                elif key == 'associated_topology':
                    try:
                        logger.info('associated_topology = {0}'.format(
                            associated_topology))
                        self.associated_topology = AssociatedTopology.from_dict(
                            value)
                    except ValueError as ve:
                        self.failure_reason = f'An error has occurred while parsing the ansible fact \'{key}\'. {ve}'
                        self.failure_details = FailureDetails(
                            FAILURE_CODE_INFRASTRUCTURE_ERROR,
                            self.failure_reason)
                        self.playbook_failed = True
                    except Exception as e:
                        self.failure_reason = f'An internal error has occurred. {e}'
                        self.failure_details = FailureDetails(
                            FAILURE_CODE_INFRASTRUCTURE_ERROR,
                            self.failure_reason)
                        self.playbook_failed = True
        self._log_event_for_ok_task(result)
Esempio n. 10
0
 def job_handler(self, job_definition):
     if 'request_id' not in job_definition or job_definition[
             'request_id'] is None:
         logger.warning(
             'Job with {0} job type is missing request_id. This job has been discarded'
             .format(LIFECYCLE_EXECUTION_MONITOR_JOB_TYPE))
         return True
     if 'deployment_location' not in job_definition or job_definition[
             'deployment_location'] is None:
         logger.warning(
             'Job with {0} job type is missing deployment_location. This job has been discarded'
             .format(LIFECYCLE_EXECUTION_MONITOR_JOB_TYPE))
         return True
     request_id = job_definition['request_id']
     deployment_location = job_definition['deployment_location']
     try:
         lifecycle_execution_task = self.handler.get_lifecycle_execution(
             request_id, deployment_location)
     except RequestNotFoundError as e:
         logger.debug(
             'Request with ID {0} not found, the request will no longer be monitored'
             .format(request_id))
         return True
     except TemporaryResourceDriverError as e:
         logger.exception(
             'Temporary error occurred checking status of request with ID {0}. The job will be re-queued: {1}'
             .format(request_id, str(e)))
         return False
     except Exception as e:
         logger.exception(
             'Unexpected error occurred checking status of request with ID {0}. A failure response will be posted and the job will NOT be re-queued: {1}'
             .format(request_id, str(e)))
         lifecycle_execution_task = LifecycleExecution(
             request_id, STATUS_FAILED,
             FailureDetails(FAILURE_CODE_INTERNAL_ERROR, str(e)))
         self.lifecycle_messaging_service.send_lifecycle_execution(
             lifecycle_execution_task)
         return True
     status = lifecycle_execution_task.status
     if status in [STATUS_COMPLETE, STATUS_FAILED]:
         self.lifecycle_messaging_service.send_lifecycle_execution(
             lifecycle_execution_task)
         if hasattr(self.handler, 'post_lifecycle_response'):
             try:
                 logger.debug(
                     f'Calling post_lifecycle_response for request with ID: {0}'
                     .format(request_id))
                 self.handler.post_lifecycle_response(
                     request_id, deployment_location)
             except Exception as e:
                 logger.exception(
                     'Unexpected error occurred on post_lifecycle_response for request with ID {0}. This error has no impact on the response: {1}'
                     .format(request_id, str(e)))
         return True
     return False
 def __handle_unreachable(self, result):
     # TODO do not overwrite if already set
     self.failed_task = result._task.get_name()
     self.host_unreachable_log.append(
         dict(task=self.failed_task, result=result._result))
     self.host_unreachable = True
     self.failure_reason = 'Resource unreachable (task ' + str(
         self.failed_task) + ' failed: ' + str(result._result) + ')'
     self.failure_details = FailureDetails(FAILURE_CODE_RESOURCE_NOT_FOUND,
                                           self.failure_reason)
     self.playbook_failed = True
Esempio n. 12
0
    def pod_watcher_worker(self):
        try:
            logger.info('Monitoring pods')

            # TODO loop until close condition is set
            while True:
                last_seen_version = 0
                # poll forever (timeout == 0)
                for pod_event in self.watcher.stream(
                        self.coreV1Api().list_pod_for_all_namespaces,
                        resource_version=last_seen_version,
                        timeout_seconds=0):
                    event_type = pod_event['type']
                    pod = pod_event['object']
                    metadata = pod.metadata

                    if last_seen_version == 0:
                        # track where we are up to in the pod events stream in case we have to restart
                        last_seen_version = metadata.resource_version

                    pod_name = metadata.name
                    labels = metadata.labels
                    infrastructure_id = labels.get('infrastructure_id', None)
                    if infrastructure_id is not None:
                        logging_context.set_from_dict(labels)
                        try:
                            logger.debug('Got pod event {0}'.format(pod_event))

                            outputs = {}
                            phase = pod.status.phase
                            podStatus = self.__build_pod_status(
                                event_type, pod, outputs)
                            request_type = 'CREATE'
                            failure_details = None
                            outputs = {"host": pod.metadata.name}

                            if phase is None:
                                status = STATUS_UNKNOWN
                            elif phase in ['Pending']:
                                container_statuses = pod.status.container_statuses
                                if container_statuses is not None and len(
                                        container_statuses) > 0:
                                    waiting = container_statuses[
                                        0].state.waiting
                                    if (waiting is not None):
                                        if (waiting.reason in [
                                                'ErrImagePull',
                                                'ImagePullBackOff'
                                        ]):
                                            status = STATUS_FAILED
                                            failure_details = FailureDetails(
                                                FAILURE_CODE_INFRASTRUCTURE_ERROR,
                                                'ErrImagePull')
                                        else:
                                            status = STATUS_IN_PROGRESS
                                    else:
                                        status = STATUS_IN_PROGRESS
                                else:
                                    status = STATUS_IN_PROGRESS
                            elif phase in ['Running']:
                                status = STATUS_COMPLETE
                            elif phase in ['Failed']:
                                status = STATUS_FAILED
                                failure_details = FailureDetails(
                                    FAILURE_CODE_INFRASTRUCTURE_ERROR,
                                    podStatus.status_reason)
                            else:
                                status = STATUS_UNKNOWN

                            if status in [STATUS_COMPLETE, STATUS_FAILED]:
                                if status == STATUS_COMPLETE:
                                    try:
                                        # try to find the ConfigMap that contains information on output property mappings
                                        cm = self.coreV1Api(
                                        ).read_namespaced_config_map(
                                            infrastructure_id,
                                            self.namespace())
                                        logger.info(
                                            "Got ConfigMap {0} for infrastructure_id {1}"
                                            .format(str(cm),
                                                    infrastructure_id))
                                        if cm is not None:
                                            for output_prop_name, k8s_key in cm.data.items(
                                            ):
                                                logger.info(
                                                    "Output: {0}={1}".format(
                                                        output_prop_name,
                                                        k8s_key))
                                                if k8s_key.startswith(
                                                        'network.'):
                                                    k8s_prop_name = k8s_key[
                                                        len('network.'):]
                                                    logger.info(
                                                        "k8s_prop_name: {0}".
                                                        format(k8s_prop_name))

                                                annotations = pod.metadata.annotations
                                                networks_status_str = annotations.get(
                                                    'k8s.v1.cni.cncf.io/networks-status',
                                                    None)
                                                logger.info(
                                                    'networks_status_str: {0}'.
                                                    format(
                                                        str(networks_status_str
                                                            )))
                                                if networks_status_str is not None:
                                                    networks_status = json.loads(
                                                        networks_status_str)
                                                    for network_status in networks_status:
                                                        net_name = network_status.get(
                                                            'name', None)
                                                        net_ips = network_status.get(
                                                            'ips', {})
                                                        logger.info(
                                                            'net_name {0}, net_ips {1}'
                                                            .format(
                                                                net_name,
                                                                str(net_ips)))
                                                        if net_name is not None and len(
                                                                net_ips) > 0:
                                                            if net_name == k8s_prop_name:
                                                                outputs[
                                                                    output_prop_name] = net_ips[
                                                                        0]
                                                else:
                                                    logger.info(
                                                        'network status not found for output property {0}'
                                                        .format(
                                                            output_prop_name))

                                    except K8sApiException as e:
                                        # ok
                                        if e.status == 404:
                                            logger.info(
                                                "Unable to find cm for infrastructure id {0}"
                                                .format(infrastructure_id))

                                inf_task = InfrastructureTask(
                                    infrastructure_id, infrastructure_id,
                                    status, failure_details, outputs)
                                logger.info(
                                    'Sending infrastructure response {0}'.
                                    format(str(inf_task)))

                                self.inf_messaging_service.send_infrastructure_task(
                                    inf_task)
                        finally:
                            logging_context.clear()
        except Exception:
            logger.exception(
                "Unexpected exception watching pods, re-initializing")
            self.pod_watcher_worker()
    def run_lifecycle_playbook(self, request):
        driver_files = request['driver_files']
        key_property_processor = None

        try:
            request_id = request['request_id']
            lifecycle = request['lifecycle_name']
            properties = request['resource_properties']
            system_properties = request['system_properties']
            deployment_location = request['deployment_location']
            if not isinstance(deployment_location, dict):
                return LifecycleExecution(
                    request_id, STATUS_FAILED,
                    FailureDetails(FAILURE_CODE_INTERNAL_ERROR,
                                   "Deployment Location must be an object"),
                    {})
            dl_properties = PropValueMap(
                deployment_location.get('properties', {}))

            config_path = driver_files.get_directory_tree('config')
            scripts_path = driver_files.get_directory_tree('scripts')

            key_property_processor = KeyPropertyProcessor(
                properties, system_properties, dl_properties)

            playbook_path = get_lifecycle_playbook_path(
                scripts_path, lifecycle)
            if playbook_path is not None:
                if not os.path.exists(playbook_path):
                    return LifecycleExecution(
                        request_id, STATUS_FAILED,
                        FailureDetails(FAILURE_CODE_INTERNAL_ERROR,
                                       "Playbook path does not exist"), {})

                if deployment_location.get('type') == 'Kubernetes':
                    dl_properties['kubeconfig_path'] = self.create_kube_config(
                        deployment_location)
                    connection_type = "k8s"
                    inventory_path = config_path.get_file_path(INVENTORY_K8S)
                else:
                    connection_type = "ssh"
                    inventory_path = config_path.get_file_path(INVENTORY)

                # process key properties by writing them out to a temporary file and adding an
                # entry to the property dictionary that maps the "[key_name].path" to the key file path
                key_property_processor.process_key_properties()

                logger.debug('config_path = ' + config_path.get_path())
                logger.debug('driver_files = ' + scripts_path.get_path())
                logger.debug("playbook_path=" + playbook_path)
                logger.debug("inventory_path=" + inventory_path)

                all_properties = {
                    'properties': properties,
                    'system_properties': system_properties,
                    'dl_properties': dl_properties
                }

                process_templates(config_path, all_properties)

                # always retry on unreachable
                num_retries = self.ansible_properties.max_unreachable_retries

                for i in range(0, num_retries):
                    if i > 0:
                        logger.debug(
                            'Playbook {0}, unreachable retry attempt {1}/{2}'.
                            format(playbook_path, i + 1, num_retries))
                    start_time = datetime.now()
                    ret = self.run_playbook(request_id, connection_type,
                                            inventory_path, playbook_path,
                                            lifecycle, all_properties)
                    if not ret.host_unreachable:
                        break
                    end_time = datetime.now()
                    if self.ansible_properties.unreachable_sleep_seconds > 0:
                        # Factor in that the playbook may have taken some time to determine is was unreachable
                        # by using the unreachable_sleep_seconds value as a minimum amount of time for the delay
                        delta = end_time - start_time
                        retry_seconds = max(
                            0,
                            self.ansible_properties.unreachable_sleep_seconds -
                            int(delta.total_seconds()))
                        time.sleep(retry_seconds)

                return ret.get_result()
            else:
                msg = "No playbook to run at {0} for lifecycle {1} for request {2}".format(
                    playbook_path, lifecycle, request_id)
                logger.debug(msg)
                return LifecycleExecution(
                    request_id, STATUS_FAILED,
                    FailureDetails(FAILURE_CODE_INTERNAL_ERROR, msg), {})
        except InvalidRequestException as ire:
            return LifecycleExecution(
                request_id, STATUS_FAILED,
                FailureDetails(FAILURE_CODE_INTERNAL_ERROR, ire.msg), {})
        except Exception as e:
            logger.exception("Unexpected exception running playbook")
            return LifecycleExecution(
                request_id, STATUS_FAILED,
                FailureDetails(FAILURE_CODE_INTERNAL_ERROR,
                               "Unexpected exception: {0}".format(e)), {})
        finally:
            if key_property_processor is not None:
                key_property_processor.clear_key_files()

            keep_files = request.get('keep_files', False)
            if not keep_files and driver_files is not None:
                try:
                    logger.debug(
                        'Attempting to remove lifecycle scripts at {0}'.format(
                            driver_files.root_path))
                    driver_files.remove_all()
                except Exception as e:
                    logger.exception(
                        'Encountered an error whilst trying to clear out lifecycle scripts directory {0}: {1}'
                        .format(driver_files.root_path, str(e)))
Esempio n. 14
0
    def run_lifecycle_playbook(self, request):
        driver_files = request['driver_files']
        key_property_processor = None
        location = None

        try:
            request_id = request['request_id']
            lifecycle = request['lifecycle_name']
            resource_properties = request.get('resource_properties', {})
            system_properties = request.get('system_properties', {})
            request_properties = request.get('request_properties', {})
            associated_topology = request.get('associated_topology', None)

            location = DeploymentLocation.from_request(request)

            config_path = driver_files.get_directory_tree('config')
            scripts_path = driver_files.get_directory_tree('scripts')

            key_property_processor = KeyPropertyProcessor(
                resource_properties, system_properties, location.properties())

            playbook_path = get_lifecycle_playbook_path(
                scripts_path, lifecycle)
            if playbook_path is not None:
                if not os.path.exists(playbook_path):
                    return LifecycleExecution(
                        request_id, STATUS_FAILED,
                        FailureDetails(FAILURE_CODE_INTERNAL_ERROR,
                                       "Playbook path does not exist"), {})

                inventory = Inventory(driver_files,
                                      location.infrastructure_type)

                # process key properties by writing them out to a temporary file and adding an
                # entry to the property dictionary that maps the "[key_name].path" to the key file path
                key_property_processor.process_key_properties()

                logger.debug(
                    f'Handling request {request_id} with config_path: {config_path.get_path()} driver files path: {scripts_path.get_path()} resource properties: {resource_properties} system properties {system_properties} request properties {request_properties}'
                )

                all_properties = self.render_context_service.build(
                    system_properties, resource_properties, request_properties,
                    location.deployment_location(), associated_topology)

                process_templates(config_path, self.templating, all_properties)

                # always retry on unreachable
                num_retries = self.ansible_properties.max_unreachable_retries

                for i in range(0, num_retries):
                    if i > 0:
                        logger.debug(
                            'Playbook {0}, unreachable retry attempt {1}/{2}'.
                            format(playbook_path, i + 1, num_retries))
                    start_time = datetime.now()
                    ret = self.run_playbook(request_id,
                                            location.connection_type,
                                            inventory.get_inventory_path(),
                                            playbook_path, lifecycle,
                                            all_properties)
                    if not ret.host_unreachable:
                        break
                    end_time = datetime.now()
                    if self.ansible_properties.unreachable_sleep_seconds > 0:
                        # Factor in that the playbook may have taken some time to determine is was unreachable
                        # by using the unreachable_sleep_seconds value as a minimum amount of time for the delay
                        delta = end_time - start_time
                        retry_seconds = max(
                            0,
                            self.ansible_properties.unreachable_sleep_seconds -
                            int(delta.total_seconds()))
                        time.sleep(retry_seconds)

                return ret.get_result()
            else:
                msg = "No playbook to run at {0} for lifecycle {1} for request {2}".format(
                    playbook_path, lifecycle, request_id)
                logger.debug(msg)
                return LifecycleExecution(
                    request_id, STATUS_FAILED,
                    FailureDetails(FAILURE_CODE_INTERNAL_ERROR, msg), {})
        except InvalidRequestException as ire:
            return LifecycleExecution(
                request_id, STATUS_FAILED,
                FailureDetails(FAILURE_CODE_INTERNAL_ERROR, ire.msg), {})
        except Exception as e:
            logger.exception("Unexpected exception running playbook")
            return LifecycleExecution(
                request_id, STATUS_FAILED,
                FailureDetails(FAILURE_CODE_INTERNAL_ERROR,
                               "Unexpected exception: {0}".format(e)), {})
        finally:
            if location is not None:
                location.cleanup()

            if key_property_processor is not None:
                key_property_processor.clear_key_files()

            keep_files = request.get('keep_files', False)
            if not keep_files and driver_files is not None:
                try:
                    logger.debug(
                        'Attempting to remove lifecycle scripts at {0}'.format(
                            driver_files.root_path))
                    driver_files.remove_all()
                except Exception as e:
                    logger.exception(
                        'Encountered an error whilst trying to clear out lifecycle scripts directory {0}: {1}'
                        .format(driver_files.root_path, str(e)))
Esempio n. 15
0
    def handle_request(self, request):
        try:
            partition = request.partition
            offset = request.offset
            request_as_dict = request.as_new_dict()
            request_id = request_as_dict.get('request_id', None)

            if 'lifecycle_name' not in request_as_dict or request_as_dict['lifecycle_name'] is None:
                msg = 'Lifecycle request for partition {0} offset {1} is missing lifecycle_name.'.format(partition, offset)
                logger.warning(msg)
                self.messaging_service.send_lifecycle_execution(LifecycleExecution(request_id, STATUS_FAILED, FailureDetails(FAILURE_CODE_INTERNAL_ERROR, msg), {}))
                return
            if 'driver_files' not in request_as_dict or request_as_dict['driver_files'] is None:
                msg = 'Lifecycle request for partition {0} offset {1} is missing driver_files.'.format(partition, offset)
                logger.warning(msg)
                self.messaging_service.send_lifecycle_execution(LifecycleExecution(request_id, STATUS_FAILED, FailureDetails(FAILURE_CODE_INTERNAL_ERROR, msg), {}))
                return
            if 'system_properties' not in request_as_dict or request_as_dict['system_properties'] is None:
                msg = 'Lifecycle request for partition {0} offset {1} is missing system_properties.'.format(partition, offset)
                logger.warning(msg)
                self.messaging_service.send_lifecycle_execution(LifecycleExecution(request_id, STATUS_FAILED, FailureDetails(FAILURE_CODE_INTERNAL_ERROR, msg), {}))
                return
            if 'resource_properties' not in request_as_dict or request_as_dict['resource_properties'] is None:
                msg = 'Lifecycle request for partition {0} offset {1} is missing resource_properties.'.format(partition, offset)
                logger.warning(msg)
                self.messaging_service.send_lifecycle_execution(LifecycleExecution(request_id, STATUS_FAILED, FailureDetails(FAILURE_CODE_INTERNAL_ERROR, msg), {}))
                return
            if 'request_properties' not in request_as_dict or request_as_dict['request_properties'] is None:
                msg = 'Lifecycle request for partition {0} offset {1} is missing request_properties.'.format(partition, offset)
                logger.warning(msg)
                self.messaging_service.send_lifecycle_execution(LifecycleExecution(request_id, STATUS_FAILED, FailureDetails(FAILURE_CODE_INTERNAL_ERROR, msg), {}))
                return
            if 'associated_topology' not in request_as_dict or request_as_dict['associated_topology'] is None:
                msg = 'Lifecycle request for partition {0} offset {1} is missing associated_topology.'.format(partition, offset)
                logger.warning(msg)
                self.messaging_service.send_lifecycle_execution(LifecycleExecution(request_id, STATUS_FAILED, FailureDetails(FAILURE_CODE_INTERNAL_ERROR, msg), {}))
                return
            if 'deployment_location' not in request_as_dict or request_as_dict['deployment_location'] is None:
                msg = 'Lifecycle request for partition {0} offset {1} is missing deployment_location.'.format(partition, offset)
                logger.warning(msg)
                self.messaging_service.send_lifecycle_execution(LifecycleExecution(request_id, STATUS_FAILED, FailureDetails(FAILURE_CODE_INTERNAL_ERROR, msg), {}))
                return

            file_name = '{0}'.format(str(uuid.uuid4()))
            request_as_dict['driver_files'] = self.driver_files_manager.build_tree(file_name, request_as_dict['driver_files'])
            request_as_dict['resource_properties'] = PropValueMap(request_as_dict['resource_properties'])
            request_as_dict['system_properties'] = PropValueMap(request_as_dict['system_properties'])
            request_as_dict['request_properties'] = PropValueMap(request_as_dict['request_properties'])
            request_as_dict['associated_topology'] = AssociatedTopology.from_dict(request_as_dict['associated_topology'])

            self.lifecycle_request_handler.handle_request(request_as_dict)
        except Exception as e:
            try:
                self.messaging_service.send_lifecycle_execution(LifecycleExecution(request.request_id, STATUS_FAILED, FailureDetails(FAILURE_CODE_INTERNAL_ERROR, str(e)), {}))
            except Exception as e:
                # just log this and carry on
                logger.exception('Caught exception sending lifecycle response for driver request {0} for topic {1} : {2}'.format(request.request_id, self.request_queue_config.topic.name, str(e)))
Esempio n. 16
0
    def test_run_lifecycle_with_malformed_associated_topology_in_playbook(self):
        # configure so that we can see logging from the code under test
        stream_handler = logging.StreamHandler(sys.stdout)
        logger.addHandler(stream_handler)
        try:
            request_id = uuid.uuid4().hex
            infrastructure_id_1 = uuid.uuid4().hex
            infrastructure_id_2 = uuid.uuid4().hex
            infrastructure_osp_type = 'Openstack'
            infrastructure_k8s_type = 'Kubernetes'

            properties = PropValueMap({
                'hello_world_private_ip': {
                    'value': '10.220.217.113',
                    'type': 'string'
                },
                'ansible_ssh_user': {
                    'value': 'accanto',
                    'type': 'string'
                },
                'ansible_ssh_pass': {
                    'value': 'accanto',
                    'type': 'string'
                },
                'ansible_become_pass': {
                    'value': 'accanto',
                    'type': 'string'
                }
            })
            system_properties = PropValueMap({
            })
            
            associated_topology = AssociatedTopology.from_dict({
                'apache1': {
                    'id': infrastructure_id_1,
                    'type': infrastructure_osp_type
                },
                'apache2': {
                    'id': infrastructure_id_2,
                    'type': infrastructure_k8s_type
                }

            })

            dst = self.__copy_directory_tree(str(pathlib.Path(__file__).parent.absolute()) + '/../../resources/ansible_with_malformed_associated_topology_in_playbook')

            resp = self.ansible_client.run_lifecycle_playbook({
            'lifecycle_name': 'adopt',
            'driver_files': DirectoryTree(dst),
            'system_properties': system_properties,
            'resource_properties': properties,
            'deployment_location': {
                'name': 'winterfell',
                'type': "Kubernetes",
                'properties': PropValueMap({
                })
            },
            'associated_topology': associated_topology,
            'keep_files': True,
            'request_id': request_id
            })

            self.assertLifecycleExecutionMatches(resp, LifecycleExecution(request_id, STATUS_FAILED, FailureDetails(FAILURE_CODE_INFRASTRUCTURE_ERROR, "task debug failed: {'msg': \"The task includes an option with an undefined variable. The error was: 'dict object' has no attribute 'wrong'"), {}))
            self.assertTrue(os.path.exists(dst))
        finally:
            logger.removeHandler(stream_handler)