Beispiel #1
0
def test_custom_metrics_multiple_results(aggregator, check):
    con = mock.MagicMock()
    cursor = mock.MagicMock()
    data = [["tag_value1", "1"], ["tag_value2", "2"]]
    cursor.fetchall.side_effect = lambda: iter(data)
    con.cursor.return_value = cursor

    custom_queries = [{
        "metric_prefix":
        "oracle.test1",
        "query":
        "mocked",
        "columns": [{
            "name": "tag_name",
            "type": "tag"
        }, {
            "name": "metric",
            "type": "gauge"
        }],
        "tags": ["query_tags1"],
    }]

    check.instance['custom_queries'] = custom_queries
    check._fix_custom_queries()
    check._connection = con
    query_manager = QueryManager(check,
                                 check.execute_query_raw,
                                 tags=['custom_tag'])
    query_manager.compile_queries()

    query_manager.execute()

    aggregator.assert_metric(
        "oracle.test1.metric",
        value=1,
        count=1,
        tags=["tag_name:tag_value1", "query_tags1", "custom_tag"])
    aggregator.assert_metric(
        "oracle.test1.metric",
        value=2,
        count=1,
        tags=["tag_name:tag_value2", "query_tags1", "custom_tag"])
    def test_queries_are_copied(self):
        class MyCheck(AgentCheck):
            pass

        check1 = MyCheck('test', {}, [{}])
        check2 = MyCheck('test', {}, [{}])
        dummy_query = {
            'name': 'test query',
            'query': 'foo',
            'columns': [
                {'name': 'test.foo', 'type': 'gauge', 'tags': ['override:ok']},
                {'name': 'test.baz', 'type': 'gauge', 'raw': True},
            ],
            'tags': ['test:bar'],
        }
        query_manager1 = QueryManager(check1, mock_executor(), [dummy_query])
        query_manager2 = QueryManager(check2, mock_executor(), [dummy_query])
        query_manager1.compile_queries()
        query_manager2.compile_queries()
        assert not id(query_manager1.queries[0]) == id(
            query_manager2.queries[0]
        ), "QueryManager does not copy the queries"
Beispiel #3
0
class IbmICheck(AgentCheck, ConfigMixin):
    SERVICE_CHECK_NAME = "ibm_i.can_connect"

    def __init__(self, name, init_config, instances):
        super(IbmICheck, self).__init__(name, init_config, instances)

        self._connection_string = None
        self._subprocess = None
        self._query_manager = None
        self._current_errors = 0
        self.check_initializations.append(self.set_up_query_manager)

    def check(self, _):
        self._current_errors = 0

        try:
            self.query_manager.execute()
            check_status = AgentCheck.OK
        except AttributeError as e:
            self.warning(
                'Could not set up query manager, skipping check run: %s', e)
            check_status = None
        except Exception as e:
            self._delete_connection_subprocess(e)
            check_status = AgentCheck.CRITICAL

        # At least one query failed, set the service check as failing
        if self._current_errors:
            check_status = AgentCheck.CRITICAL

        if check_status is not None:
            self.service_check(
                self.SERVICE_CHECK_NAME,
                check_status,
                tags=self.config.tags,
                hostname=self._query_manager.hostname,
            )

    def cancel(self):
        # When the check gets cancelled, clean up the connection subprocess.
        self._delete_connection_subprocess()

    def handle_query_error(self, error):
        self._current_errors += 1
        return error

    @property
    def connection_subprocess(self):
        if self._subprocess is None:
            self._create_connection_subprocess()
        return self._subprocess

    def _create_connection_subprocess(self):
        self._subprocess = subprocess.Popen(
            [
                sys.executable,
                "-c",
                "from datadog_checks.ibm_i.query_script import query; query()",
            ],
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
        )

        # Set stdout reader as non-blocking, we don't want to
        # block .read() calls to be able to time out.
        fl = fcntl.fcntl(self._subprocess.stdout.fileno(), fcntl.F_GETFL)
        fcntl.fcntl(self._subprocess.stdout, fcntl.F_SETFL, fl | os.O_NONBLOCK)

        # Set stderr reader as non-blocking, we don't want to
        # wait until EOF is sent, we only want to read whatever is there when
        # we try to return errors.
        fl = fcntl.fcntl(self._subprocess.stderr.fileno(), fcntl.F_GETFL)
        fcntl.fcntl(self._subprocess.stderr, fcntl.F_SETFL, fl | os.O_NONBLOCK)

        try:
            print(self.connection_string,
                  file=self._subprocess.stdin,
                  flush=True)
        except BrokenPipeError as e:
            # The stdin pipe is broken, usually due to the Agent
            # killing the subprocess when stopping.
            # Clean up then return.
            self._delete_connection_subprocess(e)

    def _delete_connection_subprocess(self, message):
        if self._subprocess:
            self.log.debug("Deleting connection: %s", message)
            while not self._subprocess.returncode:
                self._subprocess.kill()
                self._subprocess.wait()

        self._subprocess = None

    def execute_query(self, query, disconnect_on_error=True):
        try:
            # Write query
            print(query['text'],
                  file=self.connection_subprocess.stdin,
                  flush=True)
        except BrokenPipeError as e:
            # The stdin pipe is broken, usually due to the Agent
            # killing the subprocess when stopping.
            # Clean up then return.
            self._delete_connection_subprocess(e)
            return

        done = False
        query_start = datetime.now()

        while not done and (datetime.now() -
                            query_start).total_seconds() <= query['timeout']:
            # Sleep for a bit to wait for results & avoid being a busy loop
            time.sleep(0.2)
            try:
                # To avoid blocking never use a pipe's file descriptor iterator. See https://bugs.python.org/issue3907
                for line in iter(self.connection_subprocess.stdout.readline,
                                 ''):
                    stripped_line = line.strip()
                    if stripped_line == "":
                        # Empty line, skip
                        continue
                    if stripped_line == "ENDOFQUERY":
                        done = True
                        break
                    try:
                        yield json.loads(stripped_line)
                    except Exception as e:
                        # We didn't manage to parse the line provided by the subprocess
                        # Remove subprocess to restart on a clean state and raise.
                        self._delete_connection_subprocess(e)
                        raise
            except TypeError as e:
                # We couldn't read anything
                self.log.debug("Could not read from stdout pipe: %s", e)
                continue
            except BrokenPipeError as e:
                # The stdout pipe is broken, usually due to the Agent
                # killing the subprocess when stopping.
                # Clean up then return.
                self._delete_connection_subprocess(e)
                return

        err = None
        try:
            err = self.connection_subprocess.stderr.read().strip()
        except TypeError as e:
            # We couldn't read anything
            self.log.debug("Could not read from stderr pipe: %s", e)
        except BrokenPipeError as e:
            # The stderr pipe is broken, usually due to the Agent
            # killing the subprocess when stopping.
            # Clean up then return.
            self._delete_connection_subprocess(e)
            return

        # disconnect_on_error can be set to False for queries we
        # expect to fail and where we don't want to disconnect.
        if err:
            if disconnect_on_error:
                self._delete_connection_subprocess(err)
            raise Exception(err)

        if not done:
            if disconnect_on_error:
                self._delete_connection_subprocess(
                    "Timed out after {} seconds".format(query['timeout']))
            raise Exception("Timed out after {} seconds".format(
                query['timeout']))

    @property
    def connection_string(self):
        if self._connection_string is None:
            # https://www.connectionstrings.com/as-400/
            # https://www.ibm.com/support/pages/odbc-driver-ibm-i-access-client-solutions
            connection_string = self.config.connection_string
            if not connection_string:
                connection_string = f'Driver={{{self.config.driver.strip("{}")}}};'

                if self.config.system:
                    connection_string += f'System={self.config.system};'

                if self.config.username:
                    connection_string += f'UID={self.config.username};'

                if self.config.password:
                    connection_string += f'PWD={self.config.password};'
                    self.register_secret(self.config.password)

            self._connection_string = connection_string

        return self._connection_string

    @property
    def query_manager(self):
        if self._query_manager is None:
            self.set_up_query_manager()
        return self._query_manager

    def set_up_query_manager(self):
        system_info = self.fetch_system_info()
        if system_info:
            query_list = []
            QUERY_MAP = queries.query_map(self.config)
            is_7_3_or_higher = system_info.os_version > 7 or (
                system_info.os_version == 7 and system_info.os_release >= 3)

            for query in self.config.queries:
                if query.name == "disk_usage":
                    # disk_usage works differently on 7.2 vs 7.3
                    if is_7_3_or_higher:
                        query_list.append(
                            queries.get_base_disk_usage_73(
                                self.config.query_timeout))
                        query_list.append(
                            queries.get_disk_usage(self.config.query_timeout))
                    else:
                        query_list.append(
                            queries.get_base_disk_usage_72(
                                self.config.query_timeout))
                elif query.name == "subsystem":
                    # subsystem is only supported on 7.3
                    if is_7_3_or_higher:
                        query_list.append(
                            queries.get_subsystem_info(
                                self.config.query_timeout))
                    else:
                        # For backwards compatibility, we don't fail
                        self.log.info(
                            "Skipping 'subsystem' query since target system is older than 7.3"
                        )
                elif query.name not in QUERY_MAP:
                    raise ConfigurationError(
                        "Unknown or unsupported query name: {}".format(
                            query.name))
                else:
                    query_list.append(QUERY_MAP[query.name])

            hostname = system_info.hostname
            # Override hostname with configuration
            if self.config.hostname:
                hostname = self.config.hostname

            self._query_manager = QueryManager(
                self,
                self.execute_query,
                tags=self.config.tags,
                queries=query_list,
                hostname=hostname,
                error_handler=self.handle_query_error,
            )
            self._query_manager.compile_queries()

    def fetch_system_info(self):
        try:
            return self.system_info_query()
        except Exception as e:
            # In case of errors, the connection will have already been cleaned by execute_query.
            # We only log the error.
            self.log.error("Failed to fetch system info: %s", e)

    def system_info_query(self):
        query = {
            'text':
            "SELECT HOST_NAME, OS_VERSION, OS_RELEASE FROM SYSIBMADM.ENV_SYS_INFO",
            'timeout': self.config.query_timeout,
        }
        results = list(self.execute_query(query))  # type: List[Tuple[str]]
        if len(results) == 0:
            self.log.error("Couldn't find system info on the remote system.")
            return None
        if len(results) > 1:
            self.log.error(
                "Too many results returned by system query. Expected 1, got %d",
                len(results))
            return None

        info_row = results[0]
        if len(info_row) != 3:
            self.log.error("Expected 3 columns in system info query, got %d",
                           len(info_row))
            return None

        hostname = info_row[0]
        try:
            os_version = int(info_row[1])
        except ValueError:
            self.log.error("Expected integer for OS version, got %s",
                           info_row[1])
            return None

        try:
            os_release = int(info_row[2])
        except ValueError:
            self.log.error("Expected integer for OS release, got %s",
                           info_row[2])
            return None

        return SystemInfo(hostname=hostname,
                          os_version=os_version,
                          os_release=os_release)