class EMEntityMigration(BaseModularInput):
    """
    Entity Migration modular input
    This ModInput is responsible for convert em_entity to a common itoa model
    then post to message bus specified in publish_url
    """

    app = APP_NAME
    name = 'em_entity_migration'
    title = 'Splunk App for Infrastructure - Entity Migration'
    description = 'Entity Migration with conversion from SAI to ITSI entities'
    use_external_validation = False
    use_kvstore_checkpointer = False
    use_hec_event_writer = False
    regex_entity_title_invalid_chars = re.compile(r'[="\']+')
    regex_entity_dims_invalid_chars = re.compile(r'^\$|[=.,"\']+')

    def __init__(self):
        """
        Init modular input for entity migration
        """
        super(EMEntityMigration, self).__init__()
        self.splunkd_messages_service = None

    def extra_arguments(self):
        return [
            {
                'name': 'log_level',
                'title': 'Log level',
                'description': 'Log level, default to WARNING',
                'required_on_create': False,
                'data_type': Argument.data_type_string
            },
            {
                'name': 'publish_url',
                'title': 'Publish URL',
                'description': 'The publish URL of the message bus',
                'required_on_create': True,
                'data_type': Argument.data_type_string
            },
        ]

    def do_additional_setup(self):
        # set log level
        log_level = self.inputs.get('job', {}).get('log_level', 'WARNING')
        logger.setLevel(log.parse_log_level(log_level))
        # set up message service
        self.splunkd_messages_service = Service(port=getDefault('port'),
                                                token=session['authtoken'],
                                                app=APP_NAME,
                                                owner='nobody').messages
        # set up conf file manager
        self.inputs_conf = conf_manager.ConfManager(
            session['authtoken'], APP_NAME,
            port=getDefault('port')).get_conf('inputs')

    def do_execute(self):
        """
        Main loop function, run every "interval" seconds
        :return: void
        """
        if self.is_migration_job_disabled():
            return
        try:
            if not em_common.modular_input_should_run(session['authtoken']):
                logger.info(
                    "em_entity_migration modinput will not run on non-captain node. exiting..."
                )
                return

            # use hard coded url if not found
            publish_url = self.inputs['job'].get('publish_url',
                                                 DEFAULT_PUBLISH_URL)
            logger.debug('publish_url set to %s' % publish_url)

            if em_common.is_url_valid(session['authtoken'], publish_url):
                itoa_entities = self.prepare_itoa_entities()
                if not len(itoa_entities):
                    logger.info('There are no SAI entities for migration.')
                    return
                self.publish_to_mbus(itoa_entities, publish_url)
                logger.info(
                    '%s entities successfully published to message bus' %
                    len(itoa_entities))
            else:
                self.inputs_conf.update(ENTITY_MIGRATION_INPUT,
                                        {'disabled': 1})
                # NOTE: the _reload endpoint never worked, a hard restart is required to actualy stop this script
                # from running
                self.inputs_conf.reload()
                logger.info(
                    'disabled and reloaded entity migration input stanza')
        except Exception as e:
            logger.error(
                'Failed to run entity migration modular input -- Error: %s' %
                e)
            link_to_error = em_utils.get_check_internal_log_message()
            self.splunkd_messages_service.create(
                'entity-migration-failure',
                severity='warn',
                value='Failed to migrate entities to ITSI. ' + link_to_error)

    def is_migration_job_disabled(self):
        # check if the job stanza exists
        job_stanza = self.inputs.get('job')
        if job_stanza is None:
            logger.warning('No valid job stanza found. Exiting...')
            return True
        conf_job_stanza = self.inputs_conf.get(ENTITY_MIGRATION_INPUT)
        if is_true(conf_job_stanza['disabled']):
            logger.info('job stanza is disabled. Exiting...')
            return True
        return False

    def prepare_itoa_entities(self):
        """
        Check if kvstore is ready, if not simply exit

        If ready, load all SAI entities and convert them into ITOA entities
        :return: itoa_entities
        """
        try:
            em_common.check_kvstore_readiness(session['authtoken'])
        except em_common.KVStoreNotReadyException as e:
            logger.error(
                'Migrate SAI entities to ITSI failed because KVStore is not ready - Error: %s'
                % e)
            sys.exit(1)

        all_entities = EmEntity.load(0, 0, '', 'asc')
        return self._convert_to_itoa(all_entities)

    def _convert_to_itoa(self, sai_entities):
        """
        convert entities to itoa

        :return: List of entities in KVSTore
        """
        itoa_entities = []
        for sai_entity in sai_entities:
            if self._should_publish_entity_to_itsi(sai_entity):
                filtered_dims = self._filter_invalid_dimensions_of_entity(
                    sai_entity.dimensions)
                aliases = {
                    k: filtered_dims[k]
                    for k in sai_entity.identifier_dimension_names
                }
                # entity_type_ids is a list of strings
                entity_type_ids = []
                if sai_entity.entity_class in ENTITY_CLASS_TO_ENTITY_TYPE_IDS:
                    entity_type_ids = [
                        ENTITY_CLASS_TO_ENTITY_TYPE_IDS[
                            sai_entity.entity_class]
                    ]
                ex = Entity({
                    'unique_id': sai_entity.key,
                    'aliases': aliases,
                    'title': sai_entity.title,
                    'informational': filtered_dims,
                    'entity_type_ids': entity_type_ids,
                    'creation_time': 0,
                    'updated_time': sai_entity.mod_time,
                })
                itoa_entities.append(ex)

        return itoa_entities

    def _should_publish_entity_to_itsi(self, entity):
        """
        if identifier dimensions contains invalid characters then don't migrate that entity at all
        because of the close association between entity _key and identifier dimensions and how alias
        is used to attribute KPIs and other calculation results to specific entity
        """
        # only publish active entities to itsi
        if entity.status != EmEntity.ACTIVE:
            return False

        # validate title (validation rules from itsi_entity.py)
        if ((not entity.title.strip()) or re.search(
                self.regex_entity_title_invalid_chars, entity.title)):
            logger.warning(
                'SAI entity %s will not be published to message bus because '
                'its title contains invalid characters. '
                'Invalid characters are single quotes (\'), double quotes (") and equal sign (=)'
                % entity.title)
            return False

        # validate dims (validation rules from itsi_entity.py)
        for dim in entity.identifier_dimension_names:
            if not self._is_dimension_name_valid_for_itsi(dim):
                logger.warning(
                    'SAI entity %s will not be published to message bus because '
                    'its identifier dimension "%s" contains invalid characters. '
                    'Invalid characters are single quotes (\'), double quotes ("), $ (as first character), '
                    'equal sign (=), period (.), and commas (,)' %
                    (entity.title, dim))
                return False
        return True

    def _filter_invalid_dimensions_of_entity(self, dimensions):
        """
        Filter out invalid dimensions of the given dimensions dict. This method
        does not modify the input dict.

        :param dimensions: a dimensions dict
        :return a new dimensions dict with invalid dim filtered out
        """
        dims = {}
        for dim_name in dimensions:
            if self._is_dimension_name_valid_for_itsi(dim_name):
                dims.update({dim_name: dimensions[dim_name]})
        return dims

    def _is_dimension_name_valid_for_itsi(self, dimension_name):
        if re.search(self.regex_entity_dims_invalid_chars, dimension_name):
            return False

        if dimension_name in ITSI_ENTITY_INTERNAL_KEYWORD:
            return False

        return True

    def publish_to_mbus(self, itoa_entities, url):
        entities_list = [entity.raw_data() for entity in itoa_entities]
        self._batch_save_to_mbus(data=entities_list, url=url)

    def _batch_save_to_mbus(self, data, url):
        """
        Perform multiple save operations in a batch
        """
        if not data:
            raise ArgValidationException(
                _('Batch saving failed: Batch is empty.'))

        batches = (data[x:x + DEFAULT_BATCH_SIZE]
                   for x in range(0, len(data), DEFAULT_BATCH_SIZE))
        for batch in batches:
            try:
                payload = {
                    "publisher": "Splunk App for Infrastructure",
                    "entities": batch
                }
                response, content = splunk.rest.simpleRequest(
                    url,
                    method='POST',
                    sessionKey=session['authtoken'],
                    jsonargs=json.dumps(payload))
                if response.status != 200:
                    logger.error(
                        "Failed to publish entities to message bus -- status:%s content:%s"
                        % (response.status, content))
            except Exception as e:
                logger.error(e)
                raise e
Beispiel #2
0
class AWSInputRestarter(BaseModularInput):
    """
    AWS Input Restarter
    This ModInput restarts certain AWS inputs to workaround TA-AWS bugs
    """

    app = em_constants.APP_NAME
    name = "aws_input_restarter"
    title = "Splunk App for Infrastructure - AWS Input Restarter"
    description = "Restarts certain AWS inputs to workaround TA-AWS bugs"
    use_external_validation = False
    use_kvstore_checkpointer = False
    use_hec_event_writer = False
    use_single_instance = True

    def __init__(self):
        super(AWSInputRestarter, self).__init__()
        self.splunkd_messages_service = None

    def extra_arguments(self):
        return [{
            'name': 'log_level',
            'title': 'Log level',
            'description':
            'The logging level of the modular input. Defaults to INFO',
            'required_on_create': False,
            'data_type': Argument.data_type_string
        }]

    def do_additional_setup(self):
        log_level = self.inputs.get('restarter').get('log_level', 'INFO')
        logger.setLevel(log.parse_log_level(log_level))
        self.splunkd_messages_service = Service(port=getDefault('port'),
                                                token=self.session_key,
                                                app=em_constants.APP_NAME,
                                                owner='nobody').messages

    def do_execute(self):
        try:
            if not em_common.modular_input_should_run(self.session_key):
                logger.info(
                    "Skipping aws_input_restarter modinput execution on non-captain node."
                )
                return

            request = self._generate_cloudwatch_input_request('GET')

            logger.info('Fetching AWS CloudWatch inputs...')
            response = urlopen(request)
            response = json.loads(response.read())

            # If there's an input, disable then enable it
            if not len(response.get('entry', [])):
                logger.info('No AWS CloudWatch inputs found, exiting...')
                return

            input_name = response['entry'][0]['name']
            logger.info('Attempting to restart AWS CloudWatch input: ' +
                        input_name)
            disable_request = self._generate_cloudwatch_input_request(
                'POST', data={'disabled': 1}, name=input_name)

            enable_request = self._generate_cloudwatch_input_request(
                'POST', data={'disabled': 0}, name=input_name)

            logger.info('Disabling AWS CloudWatch input: ' + input_name)
            disable_response = urlopen(disable_request)
            disable_response = json.loads(disable_response.read())

            logger.info('Enabling AWS CloudWatch input: ' + input_name)
            enable_response = urlopen(enable_request)
            enable_response = json.loads(enable_response.read())

            logger.info('Modular input execution complete!')
        except HTTPError as err:
            if err.code == 404:
                logger.warning(
                    'AWS TA is not installed. Cannot run aws_input_restarter.')
                return
        except Exception:
            error_type, error, tb = sys.exc_info()
            message = 'AWS Input Restarter Modular input execution failed: ' + str(
                error)
            logger.error(message + '\nTraceback:\n' +
                         ''.join(traceback.format_tb(tb)))
            link_to_error = get_check_internal_log_message()
            self.splunkd_messages_service.create(
                'aws-input-restarter-failure',
                severity='warn',
                value=(
                    'Failed to restart AWS data collection inputs.'
                    ' Newly added EC2 instances will cease to be detected. ' +
                    link_to_error))

    def _generate_cloudwatch_input_request(self, method, data=None, name=None):
        base_url = '%s/servicesNS/nobody/Splunk_TA_aws/splunk_ta_aws_aws_cloudwatch/%s?%s'
        headers = {
            'Authorization': 'Splunk %s' % self.session_key,
            'Content-Type': 'application/json'
        }

        # Handle the query params that are passed in
        server_uri = em_common.get_server_uri()
        query_params = dict(output_mode='json')
        query_params['count'] = 1
        query_params['offset'] = 0

        # Build the URL and make the request
        url = base_url % (server_uri, name or '', urlencode(query_params))
        request = Request(url,
                          to_bytes(urlencode(data)) if data else None,
                          headers=headers)
        request.get_method = lambda: method

        return request