def test_thresholds(mock_print, exit, thresholds, run_check, get_multi_float, calculate_mean, get_instance_addresses, get_host_address, generate_perfdata, generate_check_id, check_thresholds_and_exit, logging_utils): logger = FakeLogger() logging_utils.Logger.return_value = logger returned_thresholds = 'thresholds' thresholds.return_value = returned_thresholds float_values = 1.5, 2.0 get_multi_float.return_value = float_values mean = 1.75 calculate_mean.return_value = mean old_calculate_mean = calculate_mean.APPROACHES['arithmetic_mean'] check_snmp_aggregate.APPROACHES['arithmetic_mean'] = calculate_mean instance_addresses = '192.0.2.5', '192.0.2.6' get_instance_addresses.return_value = instance_addresses perfdata = 'performance_data' generate_perfdata.return_value = perfdata low_warn = 2 low_crit = 1 high_warn = 3 high_crit = 4 node = 'therealhost' oids = 'somefakeoid,other' target_type = 'thetypeoftarget' check_snmp_aggregate.main([ '--node', node, '--oids', oids, '--approach', 'arithmetic_mean', '--unknown', 'ignore', '--target-type', target_type, '--low-warning', text_type(low_warn), '--low-critical', text_type(low_crit), '--high-warning', text_type(high_warn), '--high-critical', text_type(high_crit) ]) check_thresholds_and_exit.assert_called_once_with( mean, returned_thresholds, perfdata, False, ) thresholds.assert_called_once_with( low_warn, low_crit, high_warn, high_crit, logger, ) check_snmp_aggregate.APPROACHES['arithmetic_mean'] = old_calculate_mean
def test_run_check_ignore_not_unknown(mock_print, exit, mock_subproc, mock_os, mock_get_types): logger = FakeLogger() script_path = 'something' target_type = 'thistargettype' hostname = 'thehost' oid = 'theoid' ignore_unknown = True mock_os.path.exists.return_value = True mock_os.path.isfile.return_value = True exit_status = 42 exit_output = 'badexitoutput' error = subprocess.CalledProcessError( exit_status, 'notarealcommand', exit_output, ) mock_subproc.side_effect = error nagios_plugin_utils.run_check(script_path, target_type, hostname, oid, logger, ignore_unknown) mock_print_arg = mock_print.call_args_list[0][0][0].lower() assert exit_output == mock_print_arg logger.string_appears_in('error', ('unknown or error', 'status', text_type(exit_status), 'output', exit_output)) exit.assert_called_once_with(exit_status)
def test_thresholds(mock_print, exit, thresholds, run_check, get_perfdata, get_single_float, get_instance_rate_path, calculate_rate, check_thresholds_and_exit, logging_utils): logger = FakeLogger() logging_utils.Logger.return_value = logger returned_thresholds = 'thresholds' thresholds.return_value = returned_thresholds perfdata = 'theperfdata' get_perfdata.return_value = perfdata float_value = 1.234 get_single_float.return_value = float_value low_warn = 2 low_crit = 1 high_warn = 3 high_crit = 4 hostname = 'therealhost' oid = 'somefakeoid' target_type = 'thetypeoftarget' check_snmp_numeric.main([ '--hostname', hostname, '--oid', oid, '--target-type', target_type, '--low-warning', text_type(low_warn), '--low-critical', text_type(low_crit), '--high-warning', text_type(high_warn), '--high-critical', text_type(high_crit) ]) check_thresholds_and_exit.assert_called_once_with( float_value, returned_thresholds, perfdata, False, ) thresholds.assert_called_once_with( low_warn, low_crit, high_warn, high_crit, logger, )
def check_no_insecure_ssl_protos(address): for proto_name, protocol in INSECURE_PROTOCOLS.items(): try: ssl.get_server_certificate((address, 443), protocol['proto']) raise AssertionError( 'An insecure SSL protocol was allowed for connection: ' '{proto_name}'.format(proto_name=proto_name)) except ssl.SSLError as err: assert protocol['error'] in text_type(err)
def test_output_and_exit_rate(mock_print, exit): value = 42 perfdata = 'abc123' state = 'OK' level = 'somelevel' rate_check = True nagios_plugin_utils.output_and_exit(value, perfdata, state, level, rate_check) mock_print_arg = mock_print.call_args_list[0][0][0] result, result_perfdata = mock_print_arg.split('|') assert perfdata == result_perfdata assert text_type(value) in result assert state in result assert level in result assert result.startswith('SNMP RATE') exit.assert_called_once_with(nagios_plugin_utils.STATUS_DETAILS[state][0])
def reconcile_monitoring(ctx, only_deployments=None, only_tenants=None): if not only_deployments: only_deployments = [] if not only_tenants: only_tenants = [] ctx.logger.info('Getting tenant list') tenants = [ tenant['name'] for tenant in get_entities( entity_type='tenants', tenant='default_tenant', properties=['name'], logger=ctx.logger, ) ] problem_deployments = {} targets = None for tenant in tenants: if only_tenants and tenant not in only_tenants: ctx.logger.info('Skipping tenant {tenant}'.format(tenant=tenant, )) continue ctx.logger.info('Checking deployments for tenant {tenant}'.format( tenant=tenant, )) targets = {} interesting_nodes = get_entities( entity_type='nodes', tenant=tenant, properties=['deployment_id', 'id'], logger=ctx.logger, include=_node_has_nagiosrest_properties, ) ctx.logger.info( 'Found {num} nodes with monitoring configuration'.format( num=len(interesting_nodes), )) notified_skipped_deployments = [] for node in interesting_nodes: dep_id = node['deployment_id'] if only_deployments and dep_id not in only_deployments: if dep_id not in notified_skipped_deployments: ctx.logger.info('Skipping deployment {dep}'.format( dep=dep_id, )) notified_skipped_deployments.append(dep_id) continue if dep_id not in targets: targets[dep_id] = [] targets[dep_id].append(node['id']) if targets: for deployment, nodes in targets.items(): ctx.logger.info( 'Starting monitoring for deployment {deployment}'.format( deployment=deployment, )) try: run_workflow( tenant=tenant, deployment=deployment, workflow_id='execute_operation', parameters={ "node_ids": nodes, "operation": ("cloudify.interfaces.monitoring.start"), }, allow_custom_parameters=False, force=False, logger=ctx.logger, ) except StartWorkflowFailed as err: ctx.logger.error( '{deployment} failed to start workflow: {err}'.format( deployment=deployment, err=text_type(err), )) if tenant not in problem_deployments: problem_deployments[tenant] = [] problem_deployments[tenant].append(deployment) if targets: ctx.logger.info('All monitored instances not listed as problems ' 'should be re-added to ' 'nagios within a short time. See individual ' 'deployments for execution states. ' 'Problem messages state: ' 'Tenant <name> had problems starting workflows, ' 'and list which deployments had these problems. ' 'If any of these appear you can re-run just those ' 'deployments by using the only_deployments ' 'argument.') if problem_deployments: for tenant in problem_deployments: ctx.logger.warn( 'Tenant {tenant} had problems starting workflows for ' 'deployments: {deps}'.format( tenant=tenant, deps=','.join(problem_deployments[tenant]), )) else: ctx.logger.info('No problems were reported starting workflows.') else: ctx.logger.warn('Nothing needed to be done. Either the combination ' 'of tenant and deployment filtering left no targets ' 'or there are no monitored deployments using the ' 'nagiosrest plugin on the cloudify manager.')
def create(ctx): props = ctx.node.properties ctx.logger.info('Validating SSL properties') if bool(props['ssl_certificate']) != bool(props['ssl_key']): raise NonRecoverableError( 'Either ssl_certificate and ssl_key must both be provided, ' 'or neither of them must be provided. ' 'ssl_certificate was: {ssl_certificate}; ' 'ssl_key was: {ssl_key}'.format( ssl_certificate=props['ssl_certificate'], ssl_key=props['ssl_key'], )) ctx.logger.info('Enabling EPEL (if required)') yum_install(text_type('epel-release')) ctx.logger.info('Installing required packages') yum_install([ 'mod_ssl', 'nagios', 'nagios-plugins-disk', 'nagios-plugins-load', 'nagios-plugins-ping', 'nagios-plugins-snmp', 'nagios-selinux', 'net-snmp', 'net-snmp-utils', 'python-flask', 'python-gunicorn', 'python-jinja2', 'python-requests', 'selinux-policy-devel', 'incron', ]) ctx.logger.info('Deploying SELinux configuration') # Prepare SELinux context for trap handler tmp_path = tempfile.mkdtemp() with open( os.path.join(tmp_path, 'cloudify-nagios-snmp-trap-handler.te'), 'w', ) as policy_handle: policy_handle.write( _decode_if_bytes( pkgutil.get_data( 'managed_nagios_plugin', 'resources/selinux/cloudify_nagios_snmp_trap_handler.te', ))) run(['make', '-f', '/usr/share/selinux/devel/Makefile', '-C', tmp_path], sudo=True) run([ 'semodule', '-i', os.path.join(tmp_path, 'cloudify-nagios-snmp-trap-handler.pp') ], sudo=True) run(['rm', '-rf', tmp_path], sudo=True) ctx.logger.info('Deploying nagios plugins and SNMP trap handler') for supporting_lib in ('_compat.py', 'constants.py', 'utils.py', 'snmp_utils.py', 'nagios_utils.py', 'rest_utils.py', 'resources/scripts/nagios_plugin_utils.py', 'resources/scripts/logging_utils.py'): if supporting_lib.startswith('resources/scripts/'): destination_filename = supporting_lib[len('resources/scripts/'):] else: destination_filename = supporting_lib deploy_file( data=pkgutil.get_data( 'managed_nagios_plugin', supporting_lib, ), destination='/usr/lib64/nagios/plugins/' + destination_filename, ownership='root.nagios', permissions='440', sudo=True, ) for script in ('check_snmp_numeric', 'check_snmp_aggregate', 'check_group_aggregate', 'check_group_meta_aggregate', 'cloudify_nagios_snmp_trap_handler', 'notify_cloudify', 'check_nagios_command_file', 'check_snmptrap_checks'): source = os.path.join('resources/scripts/', script) script_content = pkgutil.get_data('managed_nagios_plugin', source) destination = os.path.join('/usr/lib64/nagios/plugins', script) deploy_file( data=script_content, destination=destination, permissions='550', sudo=True, ) ctx.logger.info('Deploying nagiosrest') run(['mkdir', '-p', '/usr/local/www/nagiosrest'], sudo=True) for nagiosrest_file in ('nagiosrest.py', 'nagiosrest_group.py', 'nagiosrest_target.py', 'nagiosrest_tenant.py', 'logging_utils.py'): deploy_file( data=pkgutil.get_data( 'managed_nagios_plugin', 'resources/scripts/' + nagiosrest_file, ), destination='/usr/local/www/nagiosrest/' + nagiosrest_file, ownership='root.nagios', permissions='440', sudo=True, ) for supporting_lib in ('_compat.py', 'nagios_utils.py', 'utils.py', 'constants.py'): deploy_file( data=pkgutil.get_data( 'managed_nagios_plugin', supporting_lib, ), destination='/usr/local/www/nagiosrest/' + supporting_lib, ownership='root.nagios', permissions='440', sudo=True, ) for template in ('hostgroup.template', 'target.template', 'node.template', 'group.template', 'group_check.template', 'meta_group_check.template'): deploy_file( data=pkgutil.get_data( 'managed_nagios_plugin', os.path.join('resources', template), ), destination='/usr/local/www/nagiosrest/' + template, ownership='root.nagios', permissions='440', sudo=True, ) deploy_file( data=pkgutil.get_data( 'managed_nagios_plugin', 'resources/base_configuration/systemd_nagiosrest.conf', ), destination='/usr/lib/systemd/system/nagiosrest-gunicorn.service', ownership='root.root', permissions='440', sudo=True, ) ctx.logger.info('Deploying notification configuration script') deploy_file( data=pkgutil.get_data( 'managed_nagios_plugin', 'resources/scripts/update_notify_cloudify_configuration', ), destination='/usr/local/bin/update_notify_cloudify_configuration', ownership='root.root', permissions='500', sudo=True, # Must have the group of the agent user for reconcile operation to # work correctly template_params={'group': grp.getgrgid(os.getgid()).gr_name}, ) deploy_file( data=pkgutil.get_data( 'managed_nagios_plugin', '_compat.py', ), destination='/usr/local/bin/_compat.py', ownership='root.root', permissions='400', sudo=True, ) deploy_file( data=pkgutil.get_data( 'managed_nagios_plugin', 'utils.py', ), destination='/usr/local/bin/utils.py', ownership='root.root', permissions='400', sudo=True, ) deploy_file( data=pkgutil.get_data( 'managed_nagios_plugin', 'constants.py', ), destination='/usr/local/bin/constants.py', ownership='root.root', permissions='400', sudo=True, ) ctx.logger.info( 'Creating directory structure for storing temporary rate data') for rate_dir in ('nodes', 'instances'): rate_storage_path = os.path.join(RATE_BASE_PATH, rate_dir) run(['mkdir', '-p', rate_storage_path], sudo=True) run(['chown', 'nagios.', rate_storage_path], sudo=True) run(['restorecon', rate_storage_path], sudo=True) if props['ssl_certificate']: if props['ssl_certificate'].startswith("-----BEGIN CERTIFICATE-----"): deploy_file( data=props['ssl_key'], destination=SSL_KEY_PATH, ownership='root.root', permissions='440', sudo=True, ) deploy_file( data=props['ssl_certificate'], destination=SSL_CERT_PATH, ownership='root.root', permissions='444', sudo=True, ) else: download_and_deploy_file_from_blueprint( source=BLUEPRINT_SSL_KEY_PATH.format( key_file=props['ssl_key'], ), destination=SSL_KEY_PATH, ownership='root.root', permissions='440', ctx=ctx, ) download_and_deploy_file_from_blueprint( source=BLUEPRINT_SSL_CERT_PATH.format( cert_file=props['ssl_certificate'], ), destination=SSL_CERT_PATH, ownership='root.root', permissions='444', ctx=ctx, ) else: ctx.logger.info('Generating SSL certificate') generate_certs(SSL_KEY_PATH, SSL_CERT_PATH, ctx.logger) with open(SSL_CERT_PATH) as crt_handle: ctx.instance.runtime_properties['ssl_certificate'] = crt_handle.read() ctx.logger.info('Reloading systemd configuration') reload_systemd_configuration()
def create(ctx): name = ctx.node.properties['name'] description = ctx.node.properties['alias'] ctx.logger.info('Validating instance health check command') available_checks = ( 'do-not-check', 'check-host-icmp', ) if ctx.node.properties['instance_health_check'] not in available_checks: raise NonRecoverableError( 'Command "{cmd}" specified by instance_health_check was invalid. ' 'Valid options are: {options}'.format( cmd=ctx.node.properties['instance_health_check'], options=', '.join(available_checks), )) ctx.logger.info('Using most secure available SNMP configuration') connection_config = ConfigParser() connection_config.add_section('snmp_params') snmp_props = ctx.node.properties['snmp_properties'] snmp_params = None # Pick the most secure snmp settings provided if snmp_props['v3']['username'] is not None: snmp_params = { 'protocol': 3, 'seclevel': 'authPriv', 'authproto': 'SHA', 'privproto': 'AES', 'secname': snmp_props['v3']['username'], 'authpasswd': snmp_props['v3']['auth_pass'], 'privpasswd': snmp_props['v3']['priv_pass'], } if snmp_props['v3'].get('context'): snmp_params['context'] = snmp_props['v3']['context'] elif snmp_props['v2c']['community'] is not None: snmp_params = { 'protocol': '2c', 'community': snmp_props['v2c']['community'], } if snmp_params is None: raise NonRecoverableError( 'Currently checks require SNMP configuration.') else: for key, value in snmp_params.items(): connection_config.set('snmp_params', key, text_type(value)) connection_config_text = _FakeFile() connection_config.write(connection_config_text) deploy_file( data=text_type(connection_config_text), destination=get_connection_config_location(name), sudo=True, ) ctx.logger.info('Getting related checks') check_relationships = get_all_relationship_targets( ctx=ctx, target_relation_type='target_type_checks', no_target_error=('Target types must be connected to 1+ checks with ' 'relationship {target_relation_type}'), ) ctx.logger.info('Deploying configuration') create_target_type( ctx.logger, name, description, check_relationships, instance_failure_reaction=ctx.node. properties['action_on_instance_failure'], instance_health_check=ctx.node.properties['instance_health_check'], check_interval=ctx.node.properties['check_interval'], retry_interval=ctx.node.properties['retry_interval'], max_check_retries=ctx.node.properties['max_check_retries'], )