Beispiel #1
0
def query_service_network(endpoint, stack_name, service_name):
    id = None
    log = logging.getLogger('pk_docker')
    client = docker.DockerClient(base_url=endpoint)
    full_service_name = stack_name + "_" + service_name
    if pk_config.simulate():
        return None
    service_list = client.services.list()
    i = 0
    while i < len(service_list) and service_list[i].name != full_service_name:
        i += 1
    if i < len(service_list) and service_list[i].name == full_service_name:
        if len(service_list[i].attrs.get("Spec").get("TaskTemplate").get(
                "Networks")) == 1:
            id = service_list[i].attrs.get("Spec").get("TaskTemplate").get(
                "Networks")[0].get("Target")
            log.debug(
                'Docker service "{0}" in stack "{1}" is connected to network "{2}" with id "{3}".'
                .format(service_name, stack_name,
                        client.networks.get(id).name), str(id))
        else:
            log.warning(
                'Docker service "{0}" is connected to more than one network.'.
                format(full_service_name))
    else:
        log.warning('Docker service "{0}" is not found in stack "{1}".'.format(
            service_name, stack_name))
    return id
Beispiel #2
0
def query_list_of_nodes(endpoint, worker_name='micado-worker', status='ready'):
    log = logging.getLogger('pk_k8s')
    list_of_nodes = []
    if pk_config.simulate():
        return dict()
    kubernetes.config.load_kube_config()
    client = kubernetes.client.CoreV1Api()
    try:
        if status == 'ready':
            nodes = [x for x in client.list_node().items if not x.spec.taints]
            nodes = [
                x for x in nodes
                if x.metadata.labels.get('micado.eu/node_type') == worker_name
            ]
        elif status == 'down':
            nodes = [
                x for x in client.list_node().items
                if x.spec.taints and 'master' not in x.spec.taints[0].key
            ]
        for n in nodes:
            a = {}
            a['ID'] = n.metadata.name
            a['Addr'] = n.status.addresses[0].address
            list_of_nodes.append(a.copy())
        return list_of_nodes
    except Exception as e:
        log.exception('(Q) Query of k8s nodes failed.')
        return dict()
def scale_occopus_worker_node(endpoint,infra_name,worker_name,replicas):
    log=logging.getLogger('pk_occopus')
    log.info('(S) => m_node_count: {0}'.format(replicas))
    wscall = '{0}/infrastructures/{1}/scaleto/{2}/{3}'.format(endpoint,infra_name,worker_name,replicas)
    log.debug('-->curl -X POST {0}'.format(wscall))
    if not pk_config.simulate():
      response = requests.post(wscall).json()
      log.debug('-->response: {0}'.format(response))
    return
def notify_to_reload_config(endpoint):
    log = logging.getLogger('pk_prometheus')
    try:
        if not pk_config.simulate():
            requests.post(endpoint + "/-/reload")
        log.info('(C) Notification to reload config sent to Prometheus.')
    except Exception:
        log.exception(
            'Sending config reload notification to Prometheus failed:')
Beispiel #5
0
def remove_node(endpoint, id):
    log = logging.getLogger('pk_docker')
    if pk_config.simulate():
        return
    try:
        client = docker.APIClient(endpoint)
        client.remove_node(id, True)
    except Exception:
        log.error('(M) => Removing docker node failed.')
    return
def remove_alerts_under_prometheus(rules_directory, alerts, stack):
    if not alerts:
        return
    log = logging.getLogger('pk_prometheus')
    try:
        rule_file = os.path.join(rules_directory, stack + '.rules')
        if not pk_config.simulate():
            os.remove(rule_file)
    except Exception:
        log.exception('Removing alerts under Prometheus failed:')
    return
Beispiel #7
0
def remove_node(endpoint,id):
  log=logging.getLogger('pk_k8s')
  if pk_config.simulate():
    return
  kubernetes.config.load_kube_config()
  client = kubernetes.client.CoreV1Api()
  try:
    client.delete_node(id, {})
  except Exception:
    log.error('(M) => Removing k8s node failed.')
  return
Beispiel #8
0
def query_number_of_worker_nodes(endpoint, infra_name, worker_name):
    log = logging.getLogger('pk_occopus')
    instances = 1
    wscall = '{0}/infrastructures/{1}'.format(endpoint, infra_name)
    log.debug('-->curl -X GET {0}'.format(wscall))
    if not pk_config.simulate():
        response = requests.get(wscall).json()
        instances = response.get(worker_name,
                                 dict()).get('scaling',
                                             dict()).get('target', 0)
        log.debug('-->instances: {0}, response: {1}'.format(
            instances, response))
    return instances
Beispiel #9
0
def scale_k8s_deploy(endpoint,service_name,replicas):
  service_name = '-'.join(service_name.split('_')[1:])
  log=logging.getLogger('pk_k8s')
  log.info('(S) => m_container_count: {0}'.format(replicas))
  if pk_config.simulate():
    return
  kubernetes.config.load_kube_config()
  client = kubernetes.client.ExtensionsV1beta1Api()
  try:
    dep = client.read_namespaced_deployment(service_name, "default")
    dep.spec.replicas = replicas
    client.patch_namespaced_deployment_scale(service_name, "default", dep)
  except Exception as e:
    log.warning('(S) Scaling of k8s service "{0}" failed: {1}'.format(service_name,str(e)))
  return
Beispiel #10
0
def query_k8s_replicas(endpoint,service_name):
  service_name = '-'.join(service_name.split('_')[1:])
  log=logging.getLogger('pk_k8s')
  instance = 1
  if pk_config.simulate():
    return
  kubernetes.config.load_kube_config()
  client = kubernetes.client.ExtensionsV1beta1Api()
  try:
    dep = client.read_namespaced_deployment(service_name, "default")
    replicas = dep.spec.replicas
    log.debug('(C) => m_container_count for {0}: {1}'.format(service_name,replicas))
  except Exception as e:
    log.warning('(C) Querying k8s service "{0}" replicas failed: {1}'.format(service_name,str(e)))
  return instance
def deploy_alerts_under_prometheus(rules_directory, alerts, stack):
    if not alerts:
        return
    log = logging.getLogger('pk_prometheus')
    try:
        content = {'groups': [{'name': 'micado', 'rules': []}]}
        for alert in alerts:
            content['groups'][0]['rules'].append(dict(alert))
        rule_file = os.path.join(rules_directory, stack + '.rules')
        if not pk_config.simulate():
            with open(rule_file, 'w') as outfile:
                yaml.round_trip_dump(content,
                                     outfile,
                                     default_flow_style=False)
    except Exception:
        log.exception('Deploying alerts under Prometheus failed:')
    return
Beispiel #12
0
def query_docker_service_replicas(endpoint, service_name):
    log = logging.getLogger('pk_docker')
    instance = 1
    if pk_config.simulate():
        return
    client = docker.APIClient(endpoint)
    try:
        response = client.inspect_service(service_name)
        instance = response.get('Spec', dict()).get('Mode', dict()).get(
            'Replicated', dict()).get('Replicas', 1)
        log.debug('(C) => m_container_count for {0}: {1}'.format(
            service_name, instance))
    except Exception as e:
        log.warning(
            '(C) Querying docker service "{0}" replicas failed: {1}'.format(
                service_name, str(e)))
    return instance
Beispiel #13
0
def evaluate_data_queries_and_alerts_for_a_service(endpoint, policy,
                                                   servicename):
    log = logging.getLogger('pk_prometheus')
    queries, alerts = dict(), dict()
    if 'query_results' not in policy['data']:
        policy['data']['query_results'] = dict()
    all_services = policy.get('scaling', dict()).get('services', dict())
    target_service = [
        srv for srv in all_services if srv.get('name', '') == servicename
    ]
    scaling_rule_str = target_service[0].get('scaling_rule',
                                             '') if target_service else ''
    for param, query in policy.get('data', dict()).get('queries',
                                                       dict()).iteritems():
        try:
            if scaling_rule_str is not None and scaling_rule_str.find(
                    param) != -1:
                if pk_config.simulate():
                    continue
                response = requests.get(endpoint + "/api/v1/query?query=" +
                                        query).json()
                log.debug('Prometheus response query "{0}":{1}'.format(
                    query, response))
                val = extract_value_from_prometheus_response(
                    query, response, dict())
                policy['data']['query_results'][param] = float(val)
                queries[param] = float(val)
        except Exception as e:
            policy['data']['query_results'][param] = None
            queries[param] = None
            log.warning(
                'Evaluating expression for query "{0}" failed: {1}'.format(
                    param, e.message))
    policy['data']['alert_results'] = {}
    for item in policy.get('data', dict()).get('alerts', dict()):
        attrname = item['alert']
        if scaling_rule_str is not None and scaling_rule_str.find(
                attrname) != -1:
            if alerts_query(attrname) is not None:
                policy['data']['alert_results'][attrname] = True
                alerts[attrname] = True
            else:
                policy['data']['alert_results'][attrname] = False
                alerts[attrname] = False
    return queries, alerts
Beispiel #14
0
def query_list_of_nodes(endpoint, status='ready'):
    log = logging.getLogger('pk_docker')
    list_of_nodes = []
    if pk_config.simulate():
        return None
    client = docker.APIClient(endpoint)
    try:
        nodes = client.nodes(filters={'role': 'worker'})
        for n in nodes:
            if n.get('Status', dict).get('State', '') == status:
                a = {}
                a['ID'] = n.get('ID', 'undefID')
                a['Addr'] = n.get('Status', dict()).get('Addr', '')
                list_of_nodes.append(a.copy())
        return list_of_nodes
    except Exception as e:
        log.exception('(Q) Query of docker nodes failed.')
        return None
Beispiel #15
0
def scale_docker_service(endpoint, service_name, replicas):
    log = logging.getLogger('pk_docker')
    log.info('(S) => m_container_count: {0}'.format(replicas))
    if pk_config.simulate():
        return
    client = docker.APIClient(endpoint)
    try:
        version = client.inspect_service(service_name)['Version']['Index']
        ret = client.update_service(
            service_name,
            version,
            mode={'Replicated': {
                'Replicas': replicas
            }},
            fetch_current_spec=True)
    except Exception as e:
        log.warning('(S) Scaling of docker service "{0}" failed: {1}'.format(
            service_name, str(e)))
    return
def detach_prometheus_from_exporters_network(policy, swarm_endpoint):
    log = logging.getLogger('pk_prometheus')
    for exporter_endpoint in policy.get('data', dict()).get('sources', dict()):
        try:
            exporter_name = exporter_endpoint.split(':')[0]
            if '.' not in exporter_name:
                log.info(
                    '(C) => detaching prometheus from network of exporter "{0}"'
                    .format(exporter_endpoint))
                if pk_config.simulate():
                    continue
                exporter_netid = dock.query_service_network(
                    swarm_endpoint, policy['stack'], exporter_name)
                if exporter_netid:
                    dock.detach_container_from_network(swarm_endpoint,
                                                       'prometheus',
                                                       exporter_netid)
        except Exception as e:
            log.exception(
                'Detaching prometheus from network of exporter failed:')
def remove_exporters_from_prometheus_config(template_file, config_file):
    if not pk_config.simulate():
        shutil.copyfile(template_file, config_file)
def add_exporters_to_prometheus_config(policy, template_file, config_file):
    log = logging.getLogger('pk_prometheus')
    try:
        config_content = dict()
        if not pk_config.simulate():
            with open(template_file, 'r') as f:
                config_content = yaml.round_trip_load(f)
        if 'scrape_configs' not in config_content:
            config_content['scrape_configs'] = []
        #Find proper scrape_config or create
        scrape_config = [
            x for x in config_content['scrape_configs']
            if x.get('job_name', '') == 'micado' and 'static_configs' in x
        ]
        if not scrape_config:
            config_content['scrape_configs'].append({
                'job_name': 'micado',
                'static_configs': []
            })
            scrape_config = [
                x for x in config_content['scrape_configs']
                if x.get('job_name', '') == 'micado' and 'static_configs' in x
            ][0]
        else:
            scrape_config = scrape_config[0]
        #Find proper static_config or create
        static_config = [
            x for x in scrape_config['static_configs']
            if 'targets' in x.keys()
        ]
        if not static_config:
            scrape_config['static_configs'].append({'targets': []})
            static_config = [
                x for x in scrape_config['static_configs']
                if 'targets' in x.keys()
            ][0]
        else:
            static_config = static_config[0]

        config_changed = False
        for exporter_endpoint in policy.get('data',
                                            dict()).get('sources', dict()):
            if exporter_endpoint not in static_config['targets']:
                static_config['targets'].append(exporter_endpoint)
                config_changed = True
                log.info('(C) => exporter "{0}" added to config'.format(
                    exporter_endpoint))
            else:
                log.info(
                    '(C) => exporter "{0}" skipped, already part of config'.
                    format(exporter_endpoint))

        if config_changed and not pk_config.simulate():
            with open(config_file, 'w') as outfile:
                yaml.round_trip_dump(config_content,
                                     outfile,
                                     default_flow_style=False)

    except Exception as e:
        log.exception('Adding exporters to prometheus config failed:')

    return
Beispiel #19
0
def pkmain():
    global log
    parser = argparse.ArgumentParser(
        description='MiCADO component to realise scaling policies')
    parser.add_argument('--cfg',
                        dest='cfg_path',
                        default='./config.yaml',
                        help='path to configuration file')
    parser.add_argument('--policy',
                        dest='cfg_policy',
                        help='specifies the policy to execute')
    parser.add_argument('--srv',
                        action='store_true',
                        dest='cfg_srv',
                        default=False,
                        help='run in service mode')
    parser.add_argument('--simulate',
                        action='store_true',
                        dest='cfg_simulate',
                        default=False,
                        help='ommit manipulating surrounding components')
    args = parser.parse_args()
    #read configuration
    try:
        with open(args.cfg_path, 'r') as c:
            pk_config.config(yaml.safe_load(c))
    except Exception as e:
        print 'ERROR: Cannot read configuration file "{0}": {1}'.format(
            args.cfg_path, str(e))
    config = pk_config.config()
    #initialise logging facility based on the configuration
    try:
        logging.config.dictConfig(config['logging'])
        log = logging.getLogger('pk')
    except Exception as e:
        print 'ERROR: Cannot process configuration file "{0}": {1}'.format(
            args.cfg_path, str(e))
    #set simulate mode
    pk_config.simulate(args.cfg_simulate)
    if args.cfg_simulate:
        log.warning('SIMULATION mode is active! No changes will be performed.')
    #read policy file and start periodic policy evaluation in case of command-line mode
    if not args.cfg_srv:
        if not args.cfg_policy:
            log.error(
                'Policy file must be specified for standalone execution!')
            sys.exit(1)
        try:
            policy_yaml = load_policy_from_file(args.cfg_policy)
            start(policy_yaml)
        except KeyboardInterrupt:
            log.warning('Keyboard interruption detected! Shutting down...')
            stop(policy_yaml)
        except Exception:
            log.exception('An error occured during policy execution:')
            return

    #launch web service and wait for oncoming requests
    if args.cfg_srv:
        if args.cfg_policy:
            log.warning(
                'Policy file in parameter is unsused, must be defined through the API in service mode!'
            )
        pk_rest.init_service()
        pk_rest.app.run(debug=True, host='0.0.0.0', port=12345)
Beispiel #20
0
def add_exporters_to_prometheus_config(policy, template_file, config_file):
    log = logging.getLogger('pk_prometheus')
    try:
        config_content = dict()
        if not pk_config.simulate():
            shutil.copy(config_file, template_file)
            with open(template_file, 'r') as f:
                config_content = yaml.round_trip_load(f)
        if 'scrape_configs' not in config_content:
            config_content['scrape_configs'] = []
        #Find proper scrape_config or create
        scrape_config = [
            x for x in config_content['scrape_configs']
            if x.get('job_name', '') == 'micado' and 'static_configs' in x
        ]
        if not scrape_config:
            config_content['scrape_configs'].append({
                'job_name': 'micado',
                'static_configs': []
            })
            scrape_config = [
                x for x in config_content['scrape_configs']
                if x.get('job_name', '') == 'micado' and 'static_configs' in x
            ][0]
        else:
            scrape_config = scrape_config[0]
        #Find proper static_config or create
        static_config = [
            x for x in scrape_config['static_configs']
            if 'targets' in x.keys()
        ]
        if not static_config:
            scrape_config['static_configs'].append({'targets': []})
            static_config = [
                x for x in scrape_config['static_configs']
                if 'targets' in x.keys()
            ][0]
        else:
            static_config = static_config[0]

        config_changed = False
        for exporter_endpoint in policy.get('data',
                                            dict()).get('sources', dict()):
            if exporter_endpoint not in static_config['targets']:
                exp = exporter_endpoint.split(':')
                if len(exp) == 1:
                    continue
                elif '.' not in exp[0]:
                    kube_job = [
                        x for x in config_content['scrape_configs']
                        if x.get('job_name') == 'kube-services'
                    ]
                    if not kube_job:
                        continue
                    relabel = kube_job[0].get('relabel_configs', [])
                    old_label = [
                        x for x in relabel if x.get('action') == 'keep'
                    ]
                    if old_label:
                        old_label = old_label[0]
                        old_regex = old_label.get('regex')
                        new_regex = '{}|{}:{}'.format(old_regex, exp[0],
                                                      exp[1])
                        old_label['regex'] = new_regex
                    else:
                        label = {
                            'source_labels': ['endpoint'],
                            'action': 'keep',
                            'regex': '(^a)|{}:{}'.format(exp[0], exp[1])
                        }
                        relabel.append(label)
                else:
                    static_config['targets'].append(exporter_endpoint)
                config_changed = True
                log.info('(C) => exporter "{0}" added to config'.format(
                    exporter_endpoint))
            else:
                log.info(
                    '(C) => exporter "{0}" skipped, already part of config'.
                    format(exporter_endpoint))

        if config_changed and not pk_config.simulate():
            with open(config_file, 'w') as outfile:
                yaml.round_trip_dump(config_content,
                                     outfile,
                                     default_flow_style=False)

    except Exception as e:
        log.exception('Adding exporters to prometheus config failed:')

    return