Ejemplo n.º 1
0
def test_scale_down_on_group_threshold_breach():
    tenant = 'test_group_breach_scale_down'
    config = utils.load_config()
    main_client = utils.get_rest_client_using_config(
        config,
        tenant='default_tenant',
    )

    main_client.tenants.create(tenant)
    client = utils.get_rest_client_using_config(
        config,
        tenant=tenant,
    )

    utils.upload_config_secrets(config, client)
    installed_plugins = utils.upload_required_plugins(client)

    utils.deploy_nagios(
        utils.get_examples_blueprint_path('nagios-groups.yaml'),
        utils.get_nagios_inputs(config),
        client,
    )
    utils.install_blueprint(
        utils.get_examples_blueprint_path('basegroup1.yaml'),
        utils.get_monitored_vms_inputs(config),
        'basegroup1',
        client,
    )
    utils.install_blueprint(
        utils.get_examples_blueprint_path('basegroup2.yaml'),
        utils.get_monitored_vms_inputs(config),
        'basegroup2',
        client,
    )

    utils.execute_arbitrary_command(
        'basegroup1',
        'echo 2 > /tmp/cloudifytestinteger',
        client,
    )
    utils.execute_arbitrary_command(
        'basegroup1',
        'echo {time_now}:2.0 > /tmp/cloudifytestcounter'.format(time_now=int(
            time.time()), ),
        client,
    )

    # Confirm the expected workflow runs
    utils.wait_for_execution_on_deployment('scale', 'basegroup1', client)

    utils.remove_deployment('basegroup1', client)
    utils.delete_blueprint('basegroup1', client)
    utils.remove_deployment('basegroup2', client)
    utils.delete_blueprint('basegroup2', client)

    utils.remove_nagios(client)
    utils.delete_plugins(installed_plugins, client)
    utils.remove_config_secrets(client)

    main_client.tenants.delete(tenant)
def test_ignore_unreachable():
    tenant = 'test_aggregate_ignore_unreachable'
    config = utils.load_config()
    main_client = utils.get_rest_client_using_config(
        config,
        tenant='default_tenant',
    )

    main_client.tenants.create(tenant)
    client = utils.get_rest_client_using_config(
        config,
        tenant=tenant,
    )

    utils.upload_config_secrets(config, client)
    installed_plugins = utils.upload_required_plugins(client)

    utils.deploy_nagios(
        utils.get_examples_blueprint_path('nagios-aggregates.yaml'),
        utils.get_nagios_inputs(config),
        client,
    )
    utils.install_blueprint(
        utils.get_examples_blueprint_path('baseaggregate.yaml'),
        utils.get_monitored_vms_inputs(config),
        'baseaggregate',
        client,
    )

    # Turn off SNMP on a node
    utils.execute_arbitrary_command(
        'baseaggregate', 'sudo service snmpd stop', client,
        utils.get_first_node_instance('base_aggregate_host', 'baseaggregate',
                                      client))

    # Then trigger a scale down
    utils.execute_arbitrary_command(
        'baseaggregate',
        'echo {time_now}:0 > /tmp/cloudifytestcounter'.format(time_now=int(
            time.time()), ),
        client,
    )

    # Confirm the expected workflow runs
    utils.wait_for_execution_on_deployment(
        'scale',
        'baseaggregate',
        client,
        max_wait_for_start=120,
    )

    utils.remove_deployment('baseaggregate', client)
    utils.delete_blueprint('baseaggregate', client)

    utils.remove_nagios(client)
    utils.delete_plugins(installed_plugins, client)
    utils.remove_config_secrets(client)

    main_client.tenants.delete(tenant)
def test_abort_on_unreachable():
    tenant = 'test_aggregate_abort_on_unreachable'
    config = utils.load_config()
    main_client = utils.get_rest_client_using_config(
        config,
        tenant='default_tenant',
    )

    main_client.tenants.create(tenant)
    client = utils.get_rest_client_using_config(
        config,
        tenant=tenant,
    )

    utils.upload_config_secrets(config, client)
    installed_plugins = utils.upload_required_plugins(client)

    utils.deploy_nagios(
        utils.get_examples_blueprint_path('nagios-aggregates.yaml'),
        utils.get_nagios_inputs(config),
        client,
    )
    utils.install_blueprint(
        utils.get_examples_blueprint_path('baseaggregate.yaml'),
        utils.get_monitored_vms_inputs(config),
        'baseaggregate',
        client,
    )

    # Turn off SNMP on a node
    utils.execute_arbitrary_command(
        'baseaggregate', 'sudo service snmpd stop', client,
        utils.get_first_node_instance('base_aggregate_host', 'baseaggregate',
                                      client))

    # We should see an UNKNOWN check state within 60 seconds
    saw_unknown_state = False
    for check in range(60):
        result = utils.execute_arbitrary_command(
            'nagios',
            'sudo tail -n10 /var/log/nagios/check_snmp_aggregate.log '
            '| grep UNKNOWN',
            client,
        )
        if result['status'] == 0:
            saw_unknown_state = True
            break
        time.sleep(1)
    assert saw_unknown_state

    utils.remove_deployment('baseaggregate', client)
    utils.delete_blueprint('baseaggregate', client)

    utils.remove_nagios(client)
    utils.delete_plugins(installed_plugins, client)
    utils.remove_config_secrets(client)

    main_client.tenants.delete(tenant)
def test_external_trap_triggers_heal():
    tenant = 'test_external_trap_triggers_heal'
    config = utils.load_config()
    main_client = utils.get_rest_client_using_config(
        config,
        tenant='default_tenant',
    )

    main_client.tenants.create(tenant)
    client = utils.get_rest_client_using_config(
        config,
        tenant=tenant,
    )

    utils.upload_config_secrets(config, client)
    installed_plugins = utils.upload_required_plugins(client)

    utils.deploy_nagios(
        utils.get_examples_blueprint_path('nagios-traps.yaml'),
        utils.get_nagios_inputs(config),
        client,
    )
    utils.install_blueprint(
        utils.get_examples_blueprint_path('basetrap.yaml'),
        utils.get_monitored_vms_inputs(config),
        'basetrap',
        client,
    )

    # Send the trap
    utils.execute_arbitrary_command(
        'nagios',
        'snmptrap -v2c -c testcommunity {ip} "" {oid} '
        '.1.3.6.1.4.1.52312.0.1.1 s "Test message" '
        '.1.3.6.1.4.1.52312.0.1.2 s "The address is {node_address}"'.format(
            ip='localhost',
            oid='.1.3.6.1.4.1.52312.0.0.2',
            node_address=utils.get_first_node_instance_ip(
                node='base_trap_host',
                deployment='basetrap',
                client=client,
            ),
        ),
        client,
    )

    # Confirm the expected workflow runs
    utils.wait_for_execution_on_deployment('heal', 'basetrap', client)

    utils.remove_deployment('basetrap', client)
    utils.delete_blueprint('basetrap', client)

    utils.remove_nagios(client)
    utils.delete_plugins(installed_plugins, client)
    utils.remove_config_secrets(client)

    main_client.tenants.delete(tenant)
def test_pre_created_certificate():
    tenant = 'test_server_pre_created_certificate'
    config = utils.load_config()
    main_client = utils.get_rest_client_using_config(
        config,
        tenant='default_tenant',
    )

    main_client.tenants.create(tenant)
    client = utils.get_rest_client_using_config(
        config,
        tenant=tenant,
    )

    utils.upload_config_secrets(config, client)
    installed_plugins = utils.upload_required_plugins(client)

    utils.deploy_nagios(
        utils.get_examples_blueprint_path('nagios-precert.yaml'),
        utils.get_nagios_inputs(config),
        client,
    )

    cert_path = os.path.join(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
        'examples', 'blueprints', 'ssl', 'example.crt',
    )
    with open(cert_path) as cert_handle:
        expected_cert = cert_handle.read()

    # Check the deployment output has the correct certificate
    outputs = client.deployments.outputs.get('nagios')['outputs']
    assert expected_cert == outputs['nagios_ssl_certificate']

    check_no_insecure_ssl_protos(outputs['external_address'])

    # Check the cert provided by the nagios httpd service matches
    server_cert = ssl.get_server_certificate(
        (outputs['external_address'], 443),
        # This will select the highest supported SSL/TLS version
        ssl.PROTOCOL_SSLv23,
    )
    assert expected_cert == server_cert

    utils.remove_nagios(client)
    utils.delete_plugins(installed_plugins, client)
    utils.remove_config_secrets(client)

    main_client.tenants.delete(tenant)
def test_heal_on_threshold_exceeded():
    tenant = 'test_value_breach_heal'
    config = utils.load_config()
    main_client = utils.get_rest_client_using_config(
        config,
        tenant='default_tenant',
    )

    main_client.tenants.create(tenant)
    client = utils.get_rest_client_using_config(
        config,
        tenant=tenant,
    )

    utils.upload_config_secrets(config, client)
    installed_plugins = utils.upload_required_plugins(client)

    utils.deploy_nagios(
        utils.get_examples_blueprint_path('nagios-values.yaml'),
        utils.get_nagios_inputs(config),
        client,
    )
    utils.install_blueprint(
        utils.get_examples_blueprint_path('basevalue.yaml'),
        utils.get_monitored_vms_inputs(config),
        'basevalue',
        client,
    )

    utils.execute_arbitrary_command(
        'basevalue',
        'echo 42 > /tmp/cloudifytestinteger',
        client,
    )

    # Confirm the expected workflow runs
    utils.wait_for_execution_on_deployment('heal', 'basevalue', client)

    utils.remove_deployment('basevalue', client)
    utils.delete_blueprint('basevalue', client)

    utils.remove_nagios(client)
    utils.delete_plugins(installed_plugins, client)
    utils.remove_config_secrets(client)

    main_client.tenants.delete(tenant)
def test_generated_certificate():
    tenant = 'test_server_generated_certificate'
    config = utils.load_config()
    main_client = utils.get_rest_client_using_config(
        config,
        tenant='default_tenant',
    )

    main_client.tenants.create(tenant)
    client = utils.get_rest_client_using_config(
        config,
        tenant=tenant,
    )

    utils.upload_config_secrets(config, client)
    installed_plugins = utils.upload_required_plugins(client)

    utils.deploy_nagios(
        utils.get_examples_blueprint_path('nagios-gencert.yaml'),
        utils.get_nagios_inputs(config),
        client,
    )

    # Get the certificate from the deployments output
    outputs = client.deployments.outputs.get('nagios')['outputs']
    expected_cert = outputs['nagios_ssl_certificate']

    check_no_insecure_ssl_protos(outputs['external_address'])

    # Check the cert provided by the nagios httpd service matches
    server_cert = ssl.get_server_certificate(
        (outputs['external_address'], 443),
        # This will select the highest supported SSL/TLS version
        ssl.PROTOCOL_SSLv23,
    )
    assert expected_cert == server_cert

    utils.remove_nagios(client)
    utils.delete_plugins(installed_plugins, client)
    utils.remove_config_secrets(client)

    main_client.tenants.delete(tenant)
def test_trap_triggers_scale_down():
    tenant = 'test_trap_triggers_scale_down'
    config = utils.load_config()
    main_client = utils.get_rest_client_using_config(
        config,
        tenant='default_tenant',
    )

    main_client.tenants.create(tenant)
    client = utils.get_rest_client_using_config(
        config,
        tenant=tenant,
    )

    utils.upload_config_secrets(config, client)
    installed_plugins = utils.upload_required_plugins(client)

    utils.deploy_nagios(
        utils.get_examples_blueprint_path('nagios-traps.yaml'),
        utils.get_nagios_inputs(config),
        client,
    )
    utils.install_blueprint(
        utils.get_examples_blueprint_path('basetrap.yaml'),
        utils.get_monitored_vms_inputs(config),
        'basetrap',
        client,
    )

    utils.execute(
        'basetrap',
        'scale',
        client,
        parameters={
            'scalable_entity_name': 'base_trap_host',
            'delta': '+1',
            'scale_compute': True,
        },
    )

    # Send the trap
    utils.execute_arbitrary_command(
        'basetrap',
        'snmptrap -v2c -c testcommunity {ip} "" {oid}'.format(
            ip=utils.get_nagios_internal_ip(client),
            oid='.1.3.6.1.4.1.52312.0.0.1',
        ),
        client,
    )

    # Confirm the expected workflow runs
    utils.wait_for_execution_on_deployment('scale', 'basetrap', client)

    utils.remove_deployment('basetrap', client)
    utils.delete_blueprint('basetrap', client)

    utils.remove_nagios(client)
    utils.delete_plugins(installed_plugins, client)
    utils.remove_config_secrets(client)

    main_client.tenants.delete(tenant)
def test_trap_not_triggered_constraints():
    tenant = 'test_trap_not_triggering_constraints'
    config = utils.load_config()
    main_client = utils.get_rest_client_using_config(
        config,
        tenant='default_tenant',
    )

    main_client.tenants.create(tenant)
    client = utils.get_rest_client_using_config(
        config,
        tenant=tenant,
    )

    utils.upload_config_secrets(config, client)
    installed_plugins = utils.upload_required_plugins(client)

    utils.deploy_nagios(
        utils.get_examples_blueprint_path('nagios-traps.yaml'),
        utils.get_nagios_inputs(config),
        client,
    )
    utils.install_blueprint(
        utils.get_examples_blueprint_path('basetrap.yaml'),
        utils.get_monitored_vms_inputs(config),
        'basetrap',
        client,
    )

    # Send the trap
    utils.execute_arbitrary_command(
        'basetrap',
        'snmptrap -v2c -c testcommunity {ip} "" {oid}'.format(
            ip=utils.get_nagios_internal_ip(client),
            oid='.1.3.6.1.4.1.52312.0.0.1',
        ),
        client,
    )

    # We should see a log entry stating that there was no reaction within 5
    # seconds
    saw_no_reaction = False
    for check in range(5):
        result = utils.execute_arbitrary_command(
            'nagios',
            'sudo tail -n10 /var/log/nagios/notify_cloudify.log '
            '| grep "No reaction"',
            client,
        )
        if result['status'] == 0:
            saw_no_reaction = True
            break
        time.sleep(1)
    assert saw_no_reaction

    utils.remove_deployment('basetrap', client)
    utils.delete_blueprint('basetrap', client)

    utils.remove_nagios(client)
    utils.delete_plugins(installed_plugins, client)
    utils.remove_config_secrets(client)

    main_client.tenants.delete(tenant)
Ejemplo n.º 10
0
def test_groups_do_not_collide():
    tenant = 'test_groups_do_not_collide'
    config = utils.load_config()
    main_client = utils.get_rest_client_using_config(
        config,
        tenant='default_tenant',
    )

    main_client.tenants.create(tenant)
    client = utils.get_rest_client_using_config(
        config,
        tenant=tenant,
    )

    utils.upload_config_secrets(config, client)
    installed_plugins = utils.upload_required_plugins(client)

    utils.deploy_nagios(
        utils.get_examples_blueprint_path('nagios-groups-nocollide.yaml'),
        utils.get_nagios_inputs(config),
        client,
    )
    utils.install_blueprint(
        utils.get_examples_blueprint_path('basegroup-nocollide.yaml'),
        utils.get_monitored_vms_inputs(config),
        'basegroup-nocollide',
        client,
    )

    attempt = 0
    counter_result = 'no checks'
    # Until the dependent checks have run there will be a response indicating
    # that there are "no checks associated"
    while 'no checks' in counter_result:
        # Wait up to a 30 seconds (this should be about twice as long as is
        # needed)
        assert attempt < 10, 'Timed out waiting for counter check'
        time.sleep(3)
        counter_result = utils.execute_arbitrary_command(
            'nagios',
            'sudo /usr/lib64/nagios/plugins/check_group_aggregate '
            '--approach="arithmetic_mean" --tenant="{tenant}" '
            '--group-instance="crossdeploymentcountergroup" '
            '--unknown="ignore" '
            '--group-type="Test check group counter"'.format(tenant=tenant, ),
            client,
        )['output']
        attempt += 1
    # This should be a reasonably large number
    assert int(get_check_value(counter_result)) > 100000

    attempt = 0
    value_result = 'no checks'
    # Until the dependent checks have run there will be a response indicating
    # that there are "no checks associated"
    while 'no checks' in value_result:
        # Wait up to a 30 seconds (this should be about twice as long as is
        # needed)
        assert attempt < 10, 'Timed out waiting for value check'
        time.sleep(3)
        value_result = utils.execute_arbitrary_command(
            'nagios',
            'sudo /usr/lib64/nagios/plugins/check_group_aggregate '
            '--approach="arithmetic_mean" --tenant="{tenant}" '
            '--group-instance="crossdeploymentvaluegroup" --unknown="ignore" '
            '--group-type="Test check group value"'.format(tenant=tenant, ),
            client,
        )['output']
        attempt += 1
    assert int(get_check_value(value_result)) == 0

    utils.remove_deployment('basegroup-nocollide', client)
    utils.delete_blueprint('basegroup-nocollide', client)

    utils.remove_nagios(client)
    utils.delete_plugins(installed_plugins, client)
    utils.remove_config_secrets(client)

    main_client.tenants.delete(tenant)
def test_adding_and_updating_target_types():
    tenant = 'test_adding_and_updating_target_types'
    config = utils.load_config()
    main_client = utils.get_rest_client_using_config(
        config,
        tenant='default_tenant',
    )

    main_client.tenants.create(tenant)
    client = utils.get_rest_client_using_config(
        config,
        tenant=tenant,
    )

    utils.upload_config_secrets(config, client)
    installed_plugins = utils.upload_required_plugins(client)

    utils.deploy_nagios(
        utils.get_examples_blueprint_path('nagios-update-1.yaml'),
        utils.get_nagios_inputs(config),
        client,
    )

    # Confirm we currently have no target types
    nodes = [item['type'] for item in client.nodes.list(_include=['type'])]
    assert 'cloudify.nagios.nodes.TargetType' not in nodes

    # We don't yet have the target type we need, so let's add it
    client.blueprints.upload(
        path=utils.get_examples_blueprint_path('nagios-update-2.yaml'),
        entity_id='nagiosupdate2',
    )
    client.deployment_updates.update_with_existing_blueprint(
        deployment_id='nagios',
        blueprint_id='nagiosupdate2',
    )

    # Now that we have the target type we need, we can install our test node
    utils.install_blueprint(
        utils.get_examples_blueprint_path('baseupdate.yaml'),
        utils.get_monitored_vms_inputs(config),
        'baseupdate',
        client,
    )

    # Now we will set the test integer to return a higher value
    utils.execute_arbitrary_command(
        'baseupdate',
        'echo 10 > /tmp/cloudifytestinteger',
        client,
    )

    # Allow time for the heal to start running if there is a problem
    time.sleep(60)

    # ...and then update the check threshold so that the check can actually run
    client.blueprints.upload(
        path=utils.get_examples_blueprint_path('nagios-update-3.yaml'),
        entity_id='nagiosupdate3',
    )
    update = client.deployment_updates.update_with_existing_blueprint(
        deployment_id='nagios',
        blueprint_id='nagiosupdate3',
        reinstall_list=[utils.get_first_node_instance(
            'base_update_instance',
            'nagios',
            client,
        )],
    )
    utils.wait_for_execution(update['execution_id'], client)

    # Reconcile because updated target types have to be re-created
    utils.execute(
        'nagios',
        'execute_operation',
        client,
        parameters={
            'node_ids': ['nagios'],
            'operation': 'cloudify.interfaces.reconcile.monitoring',
            'allow_kwargs_override': True,
        },
    )

    # Confirm the expected workflow runs
    # If the workflow already ran then it will already have healed so we won't
    # see it run again now.
    utils.wait_for_execution_on_deployment('heal',
                                           'baseupdate',
                                           client)

    utils.remove_deployment('baseupdate', client)
    utils.delete_blueprint('baseupdate', client)

    utils.remove_nagios(client)
    utils.delete_blueprint('nagiosupdate2', client)
    utils.delete_blueprint('nagiosupdate3', client)
    utils.delete_plugins(installed_plugins, client)
    utils.remove_config_secrets(client)

    main_client.tenants.delete(tenant)