def main():
    utils.setup_cli()

    alarm_name = 'alarm_crud'
    metric_name = 'alarm_crud'
    base_dimension = 'service=alarm_test'
    expression = 'max(%s{%s}) > 0' % (metric_name, base_dimension)
    description = alarm_name + ' Description'
    cli_wrapper.delete_alarm_if_exists(alarm_name)

    # Add Alarm
    alarm_id = cli_wrapper.create_alarm(alarm_name, expression,
                                        description=description)
    print('Created Alarm with id %s' % alarm_id)

    # Ensure it is created in the right state
    initial_state = 'UNDETERMINED'
    if not utils.check_alarm_state(alarm_id, initial_state):
        return 1

    states = []
    states.append(initial_state)

    # List Alarms, make sure new one shows up
    alarm_json = cli_wrapper.find_alarm_by_name(alarm_name)
    if alarm_json is None:
        print('Did not find alarm named %s using alarm-list' %
              alarm_name, file=sys.stderr)
        return 1

    if alarm_id != alarm_json['id']:
        print('Alarm %s has wrong id, expected %s but was %s' %
              (alarm_name, alarm_id, alarm_json['id']), file=sys.stderr)
        return 1

    # Output metrics that will cause it to go ALARM
    # Wait for it to change to ALARM
    if not output_metrics(alarm_id, 'ALARM', [[metric_name, base_dimension]]):
        return 1

    states.append('ALARM')

    # Modify Alarm by adding new expression that will cause it to go OK
    print('Modify Alarm expression so it will go to OK')
    new_metric_name = 'other_metric'
    new_dimension = 'dim=42'
    new_expression = '%s and max(%s{%s}) > 100' % (expression,
                                                   new_metric_name,
                                                   new_dimension)

    alarm_json = cli_wrapper.patch_alarm(alarm_id, '--expression',
                                         new_expression)
    if alarm_json['expression'] != new_expression:
        print('Did not change expression to %s instead was %s' %
              (new_expression, alarm_json['expression']), file=sys.stderr)
        return 1

    # Output metrics that will cause it to go OK
    # Wait for it to change to OK

    if not output_metrics(alarm_id, 'OK', [[metric_name, base_dimension],
                                           [new_metric_name, new_dimension]]):
        return 1

    states.append('OK')

    # Modify Alarm by deleting expression that will cause Alarm to go ALARM
    print('Delete Alarm sub expression so it will go to ALARM')
    cli_wrapper.patch_alarm(alarm_id, '--expression', expression)

    # Output metrics that will cause it to go ALARM
    # Wait for it to change to ALARM
    print('Output extra dimensions to make sure match occurs')
    extra_dimension = base_dimension + ',Extra=More'
    if not output_metrics(alarm_id, 'ALARM',
                          [[metric_name, extra_dimension]]):
        return 1

    states.append('ALARM')

    # Modify Alarm by setting alarm state to OK
    print('Set Alarm to OK, wait for transition back to ALARM')

    cli_wrapper.change_alarm_state(alarm_id, 'OK')
    states.append('OK')

    # Output metrics that will cause it to go back to ALARM
    # Wait for it to change to ALARM
    if not output_metrics(alarm_id, 'ALARM',
                          [[metric_name, base_dimension],
                           [new_metric_name, new_dimension]]):
        return 1

    states.append('ALARM')

    # Query History
    # Delete ALARM
    print('Delete alarm')
    cli_wrapper.run_mon_cli(['alarm-delete', alarm_id], useJson=False)

    # Ensure it can't be queried
    if cli_wrapper.find_alarm_by_name(alarm_name) is not None:
        print('Still found alarm %s after it was deleted' % alarm_name,
              file=sys.stderr)
        return 1

    # Query History, ensure they still show up
    if not utils.check_alarm_history(alarm_id, states):
        return 1

    # Success
    return 0
Beispiel #2
0
def smoke_test():
    notification_name = config['notification']['name']
    notification_addr = config['notification']['addr']
    notification_type = config['notification']['type']
    alarm_definition_name = config['alarm']['name']
    metric_name = config['metric']['name']
    metric_dimensions = config['metric']['dimensions']
    statsd_metric_name = config['statsd_metric']['name']
    statsd_metric_dimensions = config['statsd_metric']['dimensions']

    cleanup(notification_name, alarm_definition_name)

    # Query how many metrics there are for the Alarm
    hour_ago = datetime.datetime.now() - datetime.timedelta(hours=1)
    hour_ago_str = hour_ago.strftime('%Y-%m-%dT%H:%M:%S')
    print('Getting metrics for {}{} '.format(metric_name, metric_dimensions))
    initial_num_metrics = count_metrics(metric_name, metric_dimensions,
                                        hour_ago_str)

    if initial_num_metrics is None or initial_num_metrics == 0:
        msg = ('No metric {} with dimensions {} received in last hour'.format(
            metric_name, metric_dimensions))
        return False, msg

    print('Getting metrics for {}{} '.format(statsd_metric_name,
                                             statsd_metric_dimensions))
    initial_statsd_num_metrics = count_metrics(statsd_metric_name,
                                               statsd_metric_dimensions,
                                               hour_ago_str)

    # statsd metrics may not have been sent yet, which will return None from the CLI wrapper
    if initial_statsd_num_metrics is None:
        initial_statsd_num_metrics = 0

    start_time = time.time()

    # Create Notification through CLI
    notif_id = cli_wrapper.create_notification(notification_name,
                                               notification_addr,
                                               notification_type)

    # Create Alarm through CLI
    expression = config['alarm']['expression']
    description = config['alarm']['description']
    alarm_def_id = cli_wrapper.create_alarm_definition(
        alarm_definition_name,
        expression,
        description=description,
        ok_notif_id=notif_id,
        alarm_notif_id=notif_id,
        undetermined_notif_id=notif_id)

    # Wait for an alarm to be created
    alarm_id = wait_for_alarm_creation(alarm_def_id)

    if alarm_id is None:
        received_num_metrics = count_metrics(metric_name, metric_dimensions,
                                             hour_ago_str)
        if received_num_metrics == initial_num_metrics:
            print('Did not receive any {}{} metrics while waiting'.format(
                metric_name, metric_dimensions))
        else:
            delta = received_num_metrics - initial_num_metrics
            print('Received {} {} metrics while waiting'.format(
                delta, metric_name))
        return False, 'Alarm creation error'

    # Ensure it is created in the right state
    initial_state = 'UNDETERMINED'
    if not utils.check_alarm_state(alarm_id, initial_state):
        msg = 'Alarm is in an invalid initial state'
        return False, msg
    states = []
    states.append(initial_state)
    state = wait_for_alarm_state_change(alarm_id, initial_state)
    if state is None:
        msg = 'Alarm is in an invalid state'
        return False, msg

    if state != 'ALARM':
        print('Wrong final state, expected ALARM but was {}'.format(state),
              file=sys.stderr)
        msg = 'Alarm is in an invalid final state'
        return False, msg
    states.append(state)

    new_state = 'OK'
    states.append(new_state)
    if not cli_wrapper.change_alarm_state(alarm_id, new_state):
        msg = 'Unable to change Alarm state'
        return False, msg

    # There is a bug in the API which allows this to work. Soon that
    # will be fixed and this will fail
    if len(sys.argv) > 1:
        final_state = 'ALARM'
        states.append(final_state)

        state = wait_for_alarm_state_change(alarm_id, new_state)
        if state is None:
            msg = 'Alarm is in an unknown state'
            return False, msg

        if state != final_state:
            msg = ('Wrong final state, expected {} but was {}'.format(
                final_state, state))
            return False, msg

    # If the alarm changes state too fast, then there isn't time for the new
    # metric to arrive. Unlikely, but it has been seen
    ensure_at_least(time.time() - start_time, 35)
    change_time = time.time() - start_time

    final_num_metrics = count_metrics(metric_name, metric_dimensions,
                                      hour_ago_str)
    if final_num_metrics <= initial_num_metrics:
        msg = ('No new metrics received for {}{} in {} seconds'.format(
            metric_name, metric_dimensions, change_time))
        return False, msg
    print('Received {} metrics in {} seconds'.format(
        (final_num_metrics - initial_num_metrics), change_time))
    if not utils.check_alarm_history(alarm_id, states):
        msg = 'Invalid alarm history'
        return False, msg

    # Notifications are only sent out for the changes, so omit the first state
    if not check_notifications(alarm_id, states[1:]):
        msg = 'Could not find correct notifications for alarm {}'.format(
            alarm_id)
        return False, msg

    # Check that monasca statsd is sending metrics
    # Metrics may take some time to arrive
    print('Waiting for statsd metrics')
    for x in range(0, 30):
        final_statsd_num_metrics = count_metrics(statsd_metric_name,
                                                 statsd_metric_dimensions,
                                                 hour_ago_str)
        if final_statsd_num_metrics > initial_statsd_num_metrics:
            break
        if x >= 29:
            msg = 'No metrics received for statsd metric {}{} in {} seconds'.format(
                statsd_metric_name, statsd_metric_dimensions,
                time.time() - start_time)
            return False, msg
        time.sleep(1)
    print('Received {0} metrics for {1}{2} in {3} seconds'.format(
        final_statsd_num_metrics - initial_statsd_num_metrics,
        statsd_metric_name, statsd_metric_dimensions,
        time.time() - start_time))

    msg = ''
    return True, msg
Beispiel #3
0
def smoke_test():
    notification_name = config['notification']['name']
    notification_addr = config['notification']['addr']
    notification_type = config['notification']['type']
    alarm_definition_name = config['alarm']['name']
    metric_name = config['metric']['name']
    metric_dimensions = config['metric']['dimensions']
    statsd_metric_name = config['statsd_metric']['name']
    statsd_metric_dimensions = config['statsd_metric']['dimensions']

    cleanup(notification_name, alarm_definition_name)

    # Query how many metrics there are for the Alarm
    hour_ago = datetime.datetime.utcnow() - datetime.timedelta(hours=1)
    hour_ago_str = hour_ago.strftime('%Y-%m-%dT%H:%M:%S') + 'Z'
    print('Getting metrics for {}{} '.format(metric_name, metric_dimensions))
    initial_num_metrics = count_metrics(metric_name, metric_dimensions,
                                        hour_ago_str)

    if initial_num_metrics is None or initial_num_metrics == 0:
        msg = ('No metric {} with dimensions {} received in last hour'.format(
               metric_name, metric_dimensions))
        return False, msg

    print('Getting metrics for {}{} '.format(statsd_metric_name, statsd_metric_dimensions))
    initial_statsd_num_metrics = count_metrics(statsd_metric_name, statsd_metric_dimensions, hour_ago_str)

    # statsd metrics may not have been sent yet, which will return None from the CLI wrapper
    if initial_statsd_num_metrics is None:
        initial_statsd_num_metrics = 0

    start_time = time.time()

    # Create Notification through CLI
    notif_id = cli_wrapper.create_notification(notification_name,
                                               notification_addr,
                                               notification_type)

    # Create Alarm through CLI
    expression = config['alarm']['expression']
    description = config['alarm']['description']
    alarm_def_id = cli_wrapper.create_alarm_definition(
        alarm_definition_name,
        expression,
        description=description,
        ok_notif_id=notif_id,
        alarm_notif_id=notif_id,
        undetermined_notif_id=notif_id)

    # Wait for an alarm to be created
    alarm_id = wait_for_alarm_creation(alarm_def_id)

    if alarm_id is None:
        received_num_metrics = count_metrics(metric_name, metric_dimensions,
                                             hour_ago_str)
        if received_num_metrics == initial_num_metrics:
            print('Did not receive any {}{} metrics while waiting'.format(metric_name,metric_dimensions))
        else:
            delta = received_num_metrics - initial_num_metrics
            print('Received {} {} metrics while waiting'.format(delta, metric_name))
        return False, 'Alarm creation error'

    # Ensure it is created in the right state
    initial_state = 'UNDETERMINED'
    if not utils.check_alarm_state(alarm_id, initial_state):
        msg = 'Alarm is in an invalid initial state'
        return False, msg
    states = []
    states.append(initial_state)
    state = wait_for_alarm_state_change(alarm_id, initial_state)
    if state is None:
        msg = 'Alarm is in an invalid state'
        return False, msg

    if state != 'ALARM':
        print('Wrong final state, expected ALARM but was {}'.format(state),
              file=sys.stderr)
        msg = 'Alarm is in an invalid final state'
        return False, msg
    states.append(state)

    new_state = 'OK'
    states.append(new_state)
    if not cli_wrapper.change_alarm_state(alarm_id, new_state):
        msg = 'Unable to change Alarm state'
        return False, msg

    final_state = 'ALARM'
    states.append(final_state)

    state = wait_for_alarm_state_change(alarm_id, new_state)
    if state is None:
        msg = 'Alarm is in an unknown state'
        return False, msg

    if state != final_state:
        msg = ('Wrong final state, expected {} but was {}'.format(final_state, state))
        return False, msg

    # If the alarm changes state too fast, then there isn't time for the new
    # metric to arrive. Unlikely, but it has been seen
    ensure_at_least(time.time() - start_time, 35)
    change_time = time.time() - start_time

    final_num_metrics = count_metrics(metric_name, metric_dimensions,
                                      hour_ago_str)
    if final_num_metrics <= initial_num_metrics:
        msg = ('No new metrics received for {}{} in {} seconds'.format(metric_name, metric_dimensions, change_time))
        return False, msg
    print('Received {} metrics in {} seconds'.format((final_num_metrics - initial_num_metrics),  change_time))
    if not utils.check_alarm_history(alarm_id, states):
        msg = 'Invalid alarm history'
        return False, msg

    # Notifications are only sent out for the changes, so omit the first state
    if not check_notifications(alarm_id, states[1:]):
        msg = 'Could not find correct notifications for alarm {}'.format(alarm_id)
        return False, msg

    # Check that monasca statsd is sending metrics
    # Metrics may take some time to arrive
    print('Waiting for statsd metrics')
    for x in range(0,30):
        final_statsd_num_metrics = count_metrics(statsd_metric_name, statsd_metric_dimensions, hour_ago_str)
        if final_statsd_num_metrics > initial_statsd_num_metrics:
            break
        if x >= 29:
            msg = 'No metrics received for statsd metric {}{} in {} seconds'.format(
                  statsd_metric_name, statsd_metric_dimensions, time.time() - start_time)
            return False, msg
        time.sleep(1)
    print('Received {0} metrics for {1}{2} in {3} seconds'.format(final_statsd_num_metrics - initial_statsd_num_metrics,
                                                                  statsd_metric_name,
                                                                  statsd_metric_dimensions,
                                                                  time.time() - start_time))

    msg = ''
    return True, msg
Beispiel #4
0
def main():
    if not utils.ensure_has_notification_engine():
        return 1

    mail_host = 'localhost'
    metric_host = subprocess.check_output(['hostname', '-f']).strip()

    utils.setup_cli()

    notification_name = 'Jahmon Smoke Test'
    notification_email_addr = 'root@' + mail_host
    alarm_name = 'high cpu and load'
    metric_name = 'load_avg_1_min'
    metric_dimensions = {'hostname': metric_host}
    cleanup(notification_name, alarm_name)

    # Query how many metrics there are for the Alarm
    initial_num_metrics = count_metrics(metric_name, metric_dimensions)
    if initial_num_metrics is None:
        return 1

    start_time = time.time()

    # Create Notification through CLI
    notification_id = cli_wrapper.create_notification(notification_name,
                                                      notification_email_addr)
    # Create Alarm through CLI
    expression = 'max(cpu_system_perc) > 0 and ' + \
                 'max(load_avg_1_min{hostname=' + metric_host + '}) > 0'
    description = 'System CPU Utilization exceeds 1% and ' + \
                  'Load exceeds 3 per measurement period'
    alarm_id = cli_wrapper.create_alarm(alarm_name, expression,
                                        description=description,
                                        ok_notif_id=notification_id,
                                        alarm_notif_id=notification_id,
                                        undetermined_notif_id=notification_id)
    # Ensure it is created in the right state
    initial_state = 'UNDETERMINED'
    if not utils.check_alarm_state(alarm_id, initial_state):
        return 1
    states = []
    states.append(initial_state)

    state = wait_for_alarm_state_change(alarm_id, initial_state)
    if state is None:
        return 1

    if state != 'ALARM':
        print('Wrong final state, expected ALARM but was %s' % state,
              file=sys.stderr)
        return 1
    states.append(state)

    new_state = 'OK'
    states.append(new_state)
    cli_wrapper.change_alarm_state(alarm_id, new_state)
    # There is a bug in the API which allows this to work. Soon that
    # will be fixed and this will fail
    if len(sys.argv) > 1:
        final_state = 'ALARM'
        states.append(final_state)

        state = wait_for_alarm_state_change(alarm_id, new_state)
        if state is None:
            return 1

        if state != final_state:
            print('Wrong final state, expected %s but was %s' %
                  (final_state, state), file=sys.stderr)
            return 1

    # If the alarm changes state too fast, then there isn't time for the new
    # metric to arrive. Unlikely, but it has been seen
    ensure_at_least(time.time() - start_time, 35)
    change_time = time.time() - start_time

    final_num_metrics = count_metrics(metric_name, metric_dimensions)
    if final_num_metrics <= initial_num_metrics:
        print('No new metrics received in %d seconds' % change_time,
              file=sys.stderr)
        return 1
    print('Received %d metrics in %d seconds' %
          ((final_num_metrics - initial_num_metrics),  change_time))
    if not utils.check_alarm_history(alarm_id, states):
        return 1

    # Notifications are only sent out for the changes, so omit the first state
    if not check_notifications(alarm_id, states[1:]):
        return 1

    return 0