Exemple #1
0
def perform_worker_node_scaling(node):
    m_node_count = node.get('outputs', dict()).get('m_node_count')
    nodes_to_drop_list = node.get('outputs',
                                  dict()).get('m_nodes_todrop', list())
    if nodes_to_drop_list:
        config = pk_config.config()
        for nodetodrop in nodes_to_drop_list:
            if m_node_count > node['min_instances']:
                log.debug('(S) Dropping node {0}...'.format(nodetodrop))
                occo.drop_worker_node(endpoint=config['occopus_endpoint'],
                                      infra_name=config['occopus_infra_name'],
                                      worker_name=node['name'],
                                      replica=nodetodrop)
                m_node_count -= 1
        node['outputs']['m_node_count'] = m_node_count
    elif 'm_node_count' in node.get('outputs', dict()):
        nodecount, nmin, nmax = limit_instances(
            node['outputs'].get('m_node_count'), node.get('min_instances'),
            node.get('max_instances'))
        log.debug(
            '(S) Scaling values for {0}: min:{1} max:{2} calculated:{3} corrected:{4}'
            .format(node['name'], nmin, nmax,
                    node['outputs'].get('m_node_count', None), nodecount))
        config = pk_config.config()
        occo.scale_worker_node(endpoint=config['occopus_endpoint'],
                               infra_name=config['occopus_infra_name'],
                               worker_name=node['name'],
                               replicas=nodecount)
def prepare_session(policy_yaml):
  global log
  log = logging.getLogger('pk')
  config = pk_config.config()
  log.info('Received policy: \n{0}'.format(policy_yaml))
  policy_yaml = resolve_queries(policy_yaml)
  log.info('Resolved policy: \n{0}'.format(policy_yaml))
  policy = yaml.safe_load(policy_yaml)
  log.info('(C) Add exporters to prometheus configuration file starts')
  config_tpl = config['prometheus_config_template']
  config_target = config['prometheus_config_target']
  prom.add_exporters_to_prometheus_config(policy, config_tpl, config_target)
  log.info('(C) Add alerts to prometheus, generating rule files starts')
  prom.deploy_alerts_under_prometheus(config['prometheus_rules_directory'],
                                      policy.get('data',dict()).get('alerts'),
                                      policy.get('stack','pk'))
  log.info('(C) Notify prometheus to reload config starts')
  prom.notify_to_reload_config(config['prometheus_endpoint'])
  log.info('(C) Querying number of target nodes from Occopus starts')
  for onenode in policy.get('scaling',dict()).get('nodes',[]):
    instances = occo.query_number_of_worker_nodes(
                    endpoint=config['occopus_endpoint'],
                    infra_name=config['occopus_infra_name'],
                    worker_name=onenode['name'])
    log.info('(C) Setting m_node_count for {} to {}'.format(onenode['name'], instances))
    set_worker_node_instance_number(onenode,instances)
  log.info('(C) Querying number of service replicas from Swarm starts')
  for theservice in policy.get('scaling',dict()).get('services',[]):
    service_name = theservice.get('name','')
    full_service_name = get_full_service_name(policy, service_name)
    instances = k8s.query_k8s_replicas(config['k8s_endpoint'],full_service_name)
    log.info('(C) Setting m_container_count for {0} to {1}'.format(service_name, instances))
    set_k8s_instance_number(policy,service_name,instances)
  return policy
Exemple #3
0
def collect_inputs_for_nodes(policy, node):
    inputs = {}
    config = pk_config.config()
    inputs['m_nodes'] = k8s.query_list_of_nodes(config['k8s_endpoint'],
                                                node['name'])
    inputs['m_node_count'], _, _ = limit_instances(
        node.get('outputs', dict()).get('m_node_count'),
        node.get('min_instances'), node.get('max_instances'))
    inputs['m_nodes_todrop'] = []

    prev_node_count = node.get('inputs', dict()).get('m_node_count', None)
    prev_nodes = node.get('inputs', dict()).get('m_nodes', None)
    if prev_node_count and prev_nodes:
        if prev_node_count == len(prev_nodes):
            if inputs['m_node_count'] == len(inputs['m_nodes']):
                inputs['m_time_when_node_count_changed'] = node.get(
                    'inputs', dict()).get('m_time_when_node_count_changed', 0)
            else:
                inputs['m_time_when_node_count_changed'] = 0
        else:
            if inputs['m_node_count'] == len(inputs['m_nodes']):
                inputs['m_time_when_node_count_changed'] = int(time.time())
            else:
                inputs['m_time_when_node_count_changed'] = 0
    else:
        inputs['m_time_when_node_count_changed'] = int(time.time())
    if inputs['m_time_when_node_count_changed'] == 0:
        inputs['m_time_since_node_count_changed'] = 0
    else:
        inputs['m_time_since_node_count_changed'] = int(
            time.time()) - inputs['m_time_when_node_count_changed']

    inputs['m_userdata'] = policy.get('scaling', dict()).get('userdata', None)
    return inputs
Exemple #4
0
def varname_if_input(varname):
  config = pk_config.config()
  input_prestr=config.get('optimizer_vars_prefix_input',DEFAULT_prestr_input)
  if varname.startswith(input_prestr):
    return varname[len(input_prestr):]
  else:
    return None
Exemple #5
0
def insert_target_structure(m_opt_init_params,key,value):
  log=logging.getLogger('pk_optimizer')
  config = pk_config.config()
  prestr_target = config.get('optimizer_vars_prefix_target',DEFAULT_prestr_target)
  prestr_target_query = prestr_target+config.get('optimizer_vars_prefix_target_query',DEFAULT_prestr_target_query)
  varname, fieldname = None, None
  if key.startswith(prestr_target_query):
    varname=key[len(prestr_target_query):]
    fieldname='name'
    m_opt_variables.append(dict(lname=key,sname=varname,query=value))
  prestr_target_minth = prestr_target+config.get('optimizer_vars_prefix_target_minth',DEFAULT_prestr_target_minth)
  if key.startswith(prestr_target_minth):
    varname=key[len(prestr_target_minth):]
    fieldname='min_threshold'
  prestr_target_maxth = prestr_target+config.get('optimizer_vars_prefix_target_maxth',DEFAULT_prestr_target_maxth)
  if key.startswith(prestr_target_maxth):
    varname=key[len(prestr_target_maxth):]
    fieldname='max_threshold'
  if varname and fieldname:
    log.info('(O)   => TARGET: {0}/{1}:{2}'.format(varname,fieldname,value))
    for atarget in m_opt_init_params['constants']['target_metrics']:
      if atarget['name']==varname:
        if fieldname!='name':
          atarget[fieldname]=value
        return
    targetdict = dict()
    targetdict[fieldname] = value
    targetdict['name'] = varname
    m_opt_init_params['constants']['target_metrics'].append(targetdict)
  return
Exemple #6
0
def collect_inputs_for_nodes(policy):
    inputs = {}
    node = policy.get('scaling', dict()).get('nodes', dict())
    config = pk_config.config()
    inputs['m_nodes'] = dock.query_list_of_ready_nodes(
        config['swarm_endpoint'])
    mnc = node.get('outputs', dict()).get('m_node_count', None)
    inputs['m_node_count'] = max(min(int(mnc), int(node['max'])),
                                 int(node['min'])) if mnc else int(node['min'])

    prev_node_count = node.get('inputs', dict()).get('m_node_count', None)
    prev_nodes = node.get('inputs', dict()).get('m_nodes', None)
    if prev_node_count and prev_nodes:
        if prev_node_count == len(prev_nodes):
            if inputs['m_node_count'] == len(inputs['m_nodes']):
                inputs['m_time_when_node_count_changed'] = node.get(
                    'inputs', dict()).get('m_time_when_node_count_changed', 0)
            else:
                inputs['m_time_when_node_count_changed'] = 0
        else:
            if inputs['m_node_count'] == len(inputs['m_nodes']):
                inputs['m_time_when_node_count_changed'] = int(time.time())
            else:
                inputs['m_time_when_node_count_changed'] = 0
    else:
        inputs['m_time_when_node_count_changed'] = int(time.time())
    if inputs['m_time_when_node_count_changed'] == 0:
        inputs['m_time_since_node_count_changed'] = 0
    else:
        inputs['m_time_since_node_count_changed'] = int(
            time.time()) - inputs['m_time_when_node_count_changed']

    inputs['m_userdata'] = policy.get('scaling', dict()).get('userdata', None)
    return inputs
Exemple #7
0
def collect_init_params_and_variables(policy):
  log=logging.getLogger('pk_optimizer')
  config = pk_config.config()
  if pk_config.dryrun_get(dryrun_id):
    log.info('(O)   DRYRUN enabled. Skipping...')
    return
  reset_variables()
  m_opt_init_params['constants'] = dict()
  for varname,value in policy.get('data',dict()).get('constants',dict()).iteritems():
    retvarname = varname_if_init(varname)
    if retvarname:
      log.info('(O)   => INIT: {0}:{1}'.format(retvarname,value))
      m_opt_init_params['constants'][retvarname]=value
  m_opt_init_params['constants']['input_metrics']=list()
  for varname,query in policy.get('data',dict()).get('queries',dict()).iteritems():
    retvarname = varname_if_input(varname)
    if retvarname:
      log.info('(O)   => INPUT: {0}:{1}'.format(retvarname,query))
      m_opt_init_params['constants']['input_metrics'].append(dict(name=retvarname))
      m_opt_variables.append(dict(lname=varname,sname=retvarname,query=query))
  m_opt_init_params['constants']['target_metrics']=list()
  for varname,query in policy.get('data',dict()).get('queries',dict()).iteritems():
    if check_if_target(varname):
      insert_target_structure(m_opt_init_params,varname,query)
  for onenode in policy.get('scaling',dict()).get('nodes',[]):
    if 'm_opt_advice' in onenode.get('scaling_rule',''):
      _,omin,omax = limit_instances(None,
                                    onenode.get('min_instances'),
                                    onenode.get('max_instances'))
      m_opt_init_params['constants']['min_vm_number']=omin
      m_opt_init_params['constants']['max_vm_number']=omax
  log.debug('(O) m_opt_init_params (yaml) => {0}'.format(yaml.dump(m_opt_init_params)))
  log.debug('(O) m_opt_variables (yaml) => {0}'.format(yaml.dump(m_opt_variables)))
  return
Exemple #8
0
def collect_inputs_for_containers(policy, service_name):
    inputs = {}
    config = pk_config.config()
    nodes = policy.get('scaling', dict()).get('nodes', [])
    inputs['m_nodes'] = []
    mnc, mini, maxi = 0, 0, 0

    for theservice in policy.get('scaling', dict()).get('services', []):

        if service_name == theservice.get('name', ''):
            for node in nodes:
                if not theservice.get(
                        'hosts') or node['name'] in theservice.get(
                            'hosts', []):
                    inputs['m_nodes'] += k8s.query_list_of_nodes(
                        config['k8s_endpoint'], node['name'])
                    mnc, mini, maxi = limit_instances(
                        node.get('outputs', dict()).get('m_node_count'),
                        node.get('min_instances'), node.get('max_instances'))
            inputs['m_node_count'] = mnc
            mcc = theservice.get('outputs',
                                 dict()).get('m_container_count', None)
            inputs['m_container_count'] = max(min(int(mcc),int(theservice['max_instances'])),int(theservice['min_instances']))\
                  if mcc else int(theservice['min_instances'])
    inputs['m_userdata'] = policy.get('scaling', dict()).get('userdata', None)
    return inputs
Exemple #9
0
def perform_one_session(policy, results=None):
    global log
    log = logging.getLogger('pk')
    config = pk_config.config()

    log.info('--- session starts ---')
    log.info('(I) Collecting inputs for nodes starts')
    inputs = collect_inputs_for_nodes(policy)
    set_policy_inputs_for_nodes(policy, inputs)
    for x in inputs.keys():
        log.info('(I) => "{0}": {1}'.format(x, inputs[x]))
    log.info('(Q) Evaluating queries and alerts for nodes starts')
    if results:
        queries, alerts = add_query_results_and_alerts_to_nodes(
            policy, results)
    else:
        queries, alerts = prom.evaluate_data_queries_and_alerts_for_nodes(
            config['prometheus_endpoint'], policy)
    for attrname, attrvalue in queries.iteritems():
        log.info('(Q) => "{0}" is "{1}".'.format(attrname, attrvalue))
    for attrname, attrvalue in alerts.iteritems():
        log.info('(A) => "{0}" is "{1}".'.format(attrname, attrvalue))
    log.info('(P) Policy evaluation for nodes starts')
    perform_policy_evaluation_on_worker_nodes(policy)
    log.info('(S) Scaling of nodes starts')
    perform_worker_node_scaling(policy)
    for attrname, attrvalue in alerts.iteritems():
        prom.alerts_remove(attrname)

    for oneservice in policy.get('scaling', dict()).get('services', dict()):
        service_name = oneservice.get('name')
        log.info('(I) Collecting inputs for service "{0}" starts'.format(
            service_name))
        inputs = collect_inputs_for_containers(policy, service_name)
        set_policy_inputs_for_containers(policy, service_name, inputs)
        for x in inputs.keys():
            log.info('(I) => "{0}": {1}'.format(x, inputs[x]))
        log.info('(Q) Evaluating queries and alerts for service "{0}" starts'.
                 format(service_name))
        if results:
            queries, alerts = add_query_results_and_alerts_to_service(
                policy, results, service_name)
        else:
            queries, alerts = prom.evaluate_data_queries_and_alerts_for_a_service(
                config['prometheus_endpoint'], policy, service_name)
        for attrname, attrvalue in queries.iteritems():
            log.info('(Q) => "{0}" is "{1}".'.format(attrname, attrvalue))
        for attrname, attrvalue in alerts.iteritems():
            log.info('(A) => "{0}" is "{1}".'.format(attrname, attrvalue))
        log.info('(P) Policy evaluation for service "{0}" starts'.format(
            service_name))
        perform_policy_evaluation_on_a_docker_service(policy, service_name)
        log.info('(S) Scaling of service "{0}" starts'.format(service_name))
        perform_service_scaling(policy, service_name)
        for attrname, attrvalue in alerts.iteritems():
            prom.alerts_remove(attrname)

    log.info('--- session finished ---')
    return
Exemple #10
0
def prepare_session(policy_yaml):
    global log
    log = logging.getLogger('pk')
    config = pk_config.config()
    log.info('Received policy: \n{0}'.format(policy_yaml))
    policy_yaml = resolve_queries(policy_yaml)
    log.info('Resolved policy: \n{0}'.format(policy_yaml))
    policy = yaml.safe_load(policy_yaml)
    #Set dryrun flags
    log.info('(C) Initializing dryrun settings from policy starts')
    pk_config.dryrun_set()
    dryrun = policy.get('data', dict()).get('constants',
                                            dict()).get('m_dryrun', None)
    if type(dryrun) == list:
        for comp in dryrun:
            if comp in pk_config.var_dryrun_components:
                pk_config.dryrun_set(comp, True)
    log.info('(C) Enable dryrun for the following components: {0}'.format(
        pk_config.dryrun_get()))
    #Initialize Prometheus
    log.info('(C) Add exporters to prometheus configuration file starts')
    config_tpl = config['prometheus_config_template']
    config_target = config['prometheus_config_target']
    prom.add_exporters_to_prometheus_config(policy, config_tpl, config_target)
    log.info('(C) Add alerts to prometheus, generating rule files starts')
    prom.deploy_alerts_under_prometheus(
        config['prometheus_rules_directory'],
        policy.get('data', dict()).get('alerts'), policy.get('stack', 'pk'))
    log.info('(C) Notify prometheus to reload config starts')
    prom.notify_to_reload_config(config['prometheus_endpoint'])
    #Initialise nodes through Occopus
    log.info(
        '(C) Querying number of target nodes from Cloud Orchestrator starts')
    #policy.setdefault('scaling', dict())["cloud_orchestrator"] = get
    for onenode in policy.get('scaling', dict()).get('nodes', []):
        cloud = get_cloud_orchestrator(onenode)
        instances = cloud.query_number_of_worker_nodes(
            config, worker_name=onenode['name'])
        log.info('(C) Setting m_node_count for {} to {}'.format(
            onenode['name'], instances))
        set_worker_node_instance_number(onenode, instances)
    #Initialise service through K8S
    log.info('(C) Querying number of service replicas from K8s starts')
    for theservice in policy.get('scaling', dict()).get('services', []):
        service_name = theservice.get('name', '')
        full_service_name = get_full_service_name(policy, service_name)
        instances = k8s.query_k8s_replicas(config['k8s_endpoint'],
                                           full_service_name)
        log.info('(C)   Setting m_container_count for {0} to {1}'.format(
            service_name, instances))
        set_k8s_instance_number(policy, service_name, instances)
    #Initialise Optimizer
    log.info('(O) Scanning the optimizer parameters starts...')
    optim.collect_init_params_and_variables(policy)
    log.info('(O) Initializing optimizer starts...')
    optim.calling_rest_api_init()
    return policy
def perform_service_scaling(policy,service_name):
  for srv in policy['scaling']['services']:
    if 'm_container_count' in srv.get('outputs',dict()) and srv['name']==service_name:
        log.debug('(S) Scaling values for service "{0}": min:{1} max:{2} calculated:{3}'
		.format(srv['name'],srv['min_instances'],srv['max_instances'],srv['outputs']['m_container_count']))
        containercount = max(min(int(srv['outputs']['m_container_count']),int(srv['max_instances'])),int(srv['min_instances']))
        service_name = get_full_service_name(policy, srv['name'])
        config = pk_config.config()
        k8s.scale_k8s_deploy(config['k8s_endpoint'],service_name,containercount)
Exemple #12
0
def calling_rest_api_advice():
  log=logging.getLogger('pk_optimizer')
  if pk_config.dryrun_get(dryrun_id) or not m_opt_accessible:
    return m_opt_dummy_advice
  config = pk_config.config()
  url = config.get('optimizer_endpoint')+'/advice'
  log.debug('(O) Calling optimizer REST API advice() method: '+url)
  response = requests.get(url).json()
  log.debug('(O) Response: {0}'.format(response))
  return response
def perform_worker_node_scaling(node):
  if 'm_node_count' in node.get('outputs',dict()):
    log.debug('(S) Scaling values for {3}: min:{0} max:{1} calculated:{2}'
             .format(node['min_instances'],node['max_instances'],node['outputs']['m_node_count'],node['name']))
    nodecount = max(min(int(node['outputs']['m_node_count']),int(node['max_instances'])),int(node['min_instances']))
    config = pk_config.config()
    occo.scale_worker_node(
        endpoint=config['occopus_endpoint'],
        infra_name=config['occopus_infra_name'],
        worker_name=node['name'],
        replicas=nodecount)
Exemple #14
0
def calling_rest_api_sample(sample=dict()):
  log=logging.getLogger('pk_optimizer')
  config = pk_config.config()
  if pk_config.dryrun_get(dryrun_id):
    log.info('(O)   DRYRUN enabled. Skipping...')
    return
  if not m_opt_accessible:
    return
  url = config.get('optimizer_endpoint')+'/sample'
  log.debug('(O) Calling optimizer REST API sample() method: '+url)
  response = requests.post(url, data=yaml.dump(sample))
  log.debug('(O) Response: '+str(response))
  return
def stop(policy_yaml):
  global log
  log = logging.getLogger('pk')
  config = pk_config.config()
  policy = yaml.safe_load(policy_yaml)
  log.info('(C) Remove exporters from prometheus configuration file starts')
  prom.remove_exporters_from_prometheus_config(config['prometheus_config_template'],
                                               config['prometheus_config_target'])
  log.info('(C) Remove alerts from prometheus, deleting rule files starts')
  prom.remove_alerts_under_prometheus(config['prometheus_rules_directory'],
                                      policy.get('data',dict()).get('alerts',dict()),
                                      policy.get('stack','pk'))
  log.info('(C) Notify prometheus to reload config starts')
  prom.notify_to_reload_config(config['prometheus_endpoint'])
def collect_inputs_for_containers(policy,service_name):
  inputs={}
  config = pk_config.config()
  node = policy.get('scaling',dict()).get('nodes',dict())
  inputs['m_nodes']=dock.query_list_of_nodes(config['swarm_endpoint'])
  mnc = node.get('outputs',dict()).get('m_node_count',None)
  inputs['m_node_count'] = max(min(int(mnc),int(node['max_instances'])),int(node['min_instances'])) if mnc else int(node['min_instances'])
  for theservice in policy.get('scaling',dict()).get('services',dict()):
    if service_name == theservice.get('name',''):
      mcc = theservice.get('outputs',dict()).get('m_container_count',None)
      inputs['m_container_count'] = max(min(int(mcc),int(theservice['max_instances'])),int(theservice['min_instances']))\
            if mcc else int(theservice['min_instances'])
  inputs['m_userdata']=policy.get('scaling',dict()).get('userdata',None)
  return inputs
Exemple #17
0
def perform_worker_node_scaling(policy):
    node = policy['scaling']['nodes']
    if 'm_node_count' in node.get('outputs', dict()):
        log.debug(
            '(S) Scaling values for worker node: min:{0} max:{1} calculated:{2}'
            .format(node['min'], node['max'], node['outputs']['m_node_count']))
        nodecount = max(
            min(int(node['outputs']['m_node_count']), int(node['max'])),
            int(node['min']))
        config = pk_config.config()
        occo.scale_occopus_worker_node(
            endpoint=config['occopus_endpoint'],
            infra_name=config['occopus_infra_name'],
            worker_name=config['occopus_worker_name'],
            replicas=nodecount)
Exemple #18
0
def perform_service_scaling(policy, service_name):
    for srv in policy['scaling']['services']:
        if 'm_container_count' in srv.get(
                'outputs', dict()) and srv['name'] == service_name:
            log.debug(
                '(S) Scaling values for service "{0}": min:{1} max:{2} calculated:{3}'
                .format(srv['name'], srv['min'], srv['max'],
                        srv['outputs']['m_container_count']))
            containercount = max(
                min(int(srv['outputs']['m_container_count']), int(srv['max'])),
                int(srv['min']))
            if policy.get('stack', '') not in [None, '']:
                service_name = '{0}_{1}'.format(policy['stack'], srv['name'])
            else:
                service_name = '{0}'.format(srv['name'])
            config = pk_config.config()
            dock.scale_docker_service(config['swarm_endpoint'], service_name,
                                      containercount)
Exemple #19
0
def calling_rest_api_init():
  global m_opt_accessible
  log=logging.getLogger('pk_optimizer')
  config = pk_config.config()
  if pk_config.dryrun_get(dryrun_id):
    log.info('(O)   DRYRUN enabled. Skipping...')
    return
  url = config.get('optimizer_endpoint')+'/init'
  log.debug('(O) Calling optimizer REST API init() method: '+url)
  try:
    response = requests.post(url, data=yaml.dump(m_opt_init_params))
    m_opt_accessible = True
  except Exception as e:
    m_opt_accessible = False
    log.exception('(O) Calling optimizer REST API init() method raised exception: ')
    log.info('(O) WARNING: Optimizer is disabled for the current policy.')
    return
  log.debug('(O) Response: '+str(response))
  return
Exemple #20
0
def prepare_session(policy_yaml):
    global log
    log = logging.getLogger('pk')
    config = pk_config.config()
    log.info('Received policy: \n{0}'.format(policy_yaml))
    policy = yaml.safe_load(policy_yaml)
    resolve_queries(policy)
    log.info('(C) Add exporters to prometheus configuration file starts')
    prom.add_exporters_to_prometheus_config(
        policy, config['prometheus_config_template'],
        config['prometheus_config_target'])
    log.info('(C) Attach prometheus to network of exporters starts')
    prom.attach_prometheus_to_exporters_network(policy,
                                                config['swarm_endpoint'])
    log.info('(C) Add alerts to prometheus, generating rule files starts')
    prom.deploy_alerts_under_prometheus(
        config['prometheus_rules_directory'],
        policy.get('data', dict()).get('alerts'), policy.get('stack', 'pk'))
    log.info('(C) Notify prometheus to reload config starts')
    prom.notify_to_reload_config(config['prometheus_endpoint'])
    return policy
Exemple #21
0
def perform_one_session(policy, results=None):
    global log
    log = logging.getLogger('pk')
    config = pk_config.config()
    log.info('--- session starts ---')
    log.info('(M) Maintaining worker nodes starts')
    k8s.down_nodes_maintenance(config['k8s_endpoint'],
                               config['docker_node_unreachable_timeout'])
    for onenode in policy.get('scaling', dict()).get('nodes', []):
        node_name = onenode.get('name')
        log.info('(I) Collecting inputs for node {} starts'.format(node_name))
        inputs = collect_inputs_for_nodes(policy, onenode)
        set_policy_inputs_for_nodes(policy, inputs, onenode)
        for x in inputs.keys():
            log.info('(I)   => "{0}": {1}'.format(x, inputs[x]))
        log.info('(Q) Evaluating queries and alerts for node {} starts'.format(
            node_name))
        if results:
            queries, alerts = add_query_results_and_alerts_to_nodes(
                policy, results, onenode)
        else:
            queries, alerts = prom.evaluate_data_queries_and_alerts_for_nodes(
                config['prometheus_endpoint'], policy, onenode)
        for attrname, attrvalue in queries.iteritems():
            log.info('(Q)   => "{0}" is "{1}".'.format(attrname, attrvalue))
        for attrname, attrvalue in alerts.iteritems():
            log.info('(A)   => "{0}" is "{1}".'.format(attrname, attrvalue))

        if 'm_opt_advice' in onenode.get('scaling_rule', ''):
            log.info('(O) Creating sample for the optimizer starts')
            sample = optim.generate_sample(queries, inputs)
            log.info('(O) Sending sample for the optimizer starts')
            optim.calling_rest_api_sample(sample)

        log.info('(P) Policy evaluation for nodes starts')
        perform_policy_evaluation_on_worker_nodes(policy, onenode)
        log.info('(S) Scaling of nodes starts')
        perform_worker_node_scaling(onenode)
        for attrname, attrvalue in alerts.iteritems():
            prom.alerts_remove(attrname)
    for oneservice in policy.get('scaling', dict()).get('services', []):
        service_name = oneservice.get('name')
        log.info('(I) Collecting inputs for service "{0}" starts'.format(
            service_name))
        inputs = collect_inputs_for_containers(policy, service_name)
        set_policy_inputs_for_containers(policy, service_name, inputs)
        for x in inputs.keys():
            log.info('(I)   => "{0}": {1}'.format(x, inputs[x]))
        log.info('(Q) Evaluating queries and alerts for service "{0}" starts'.
                 format(service_name))
        if results:
            queries, alerts = add_query_results_and_alerts_to_service(
                policy, results, service_name)
        else:
            queries, alerts = prom.evaluate_data_queries_and_alerts_for_a_service(
                config['prometheus_endpoint'], policy, service_name)
        for attrname, attrvalue in queries.iteritems():
            log.info('(Q)   => "{0}" is "{1}".'.format(attrname, attrvalue))
        for attrname, attrvalue in alerts.iteritems():
            log.info('(A)   => "{0}" is "{1}".'.format(attrname, attrvalue))
        log.info('(P) Policy evaluation for service "{0}" starts'.format(
            service_name))
        perform_policy_evaluation_on_a_k8s_deploy(policy, service_name)
        log.info('(S) Scaling of service "{0}" starts'.format(service_name))
        perform_service_scaling(policy, service_name)
        for attrname, attrvalue in alerts.iteritems():
            prom.alerts_remove(attrname)

    log.info('--- session finished ---')
    return
Exemple #22
0
def check_if_target(varname):
  config = pk_config.config()
  prestr_target=config.get('optimizer_vars_prefix_target',DEFAULT_prestr_target)
  return varname.startswith(prestr_target)
Exemple #23
0
def pkmain():
    global log
    parser = argparse.ArgumentParser(
        description='MiCADO component to realise scaling policies')
    parser.add_argument('--cfg',
                        dest='cfg_path',
                        default='./config.yaml',
                        help='path to configuration file')
    parser.add_argument('--policy',
                        dest='cfg_policy',
                        help='specifies the policy to execute')
    parser.add_argument('--srv',
                        action='store_true',
                        dest='cfg_srv',
                        default=False,
                        help='run in service mode')
    parser.add_argument('--host',
                        type=str,
                        default='127.0.0.1',
                        help='host to bind service to')
    parser.add_argument('--port',
                        type=int,
                        default='12345',
                        help='port to bind service to')
    args = parser.parse_args()
    #read configuration
    try:
        with open(args.cfg_path, 'r') as c:
            pk_config.config(yaml.safe_load(c))
    except Exception as e:
        print 'ERROR: Cannot read configuration file "{0}": {1}'.format(
            args.cfg_path, str(e))
    config = pk_config.config()
    #initialise logging facility based on the configuration
    try:
        logging.config.dictConfig(config['logging'])
        log = logging.getLogger('pk')
    except Exception as e:
        print 'ERROR: Cannot process configuration file "{0}": {1}'.format(
            args.cfg_path, str(e))
    #read policy file and start periodic policy evaluation in case of command-line mode
    if not args.cfg_srv:
        if not args.cfg_policy:
            log.error(
                'Policy file must be specified for standalone execution!')
            sys.exit(1)
        try:
            policy_yaml = load_policy_from_file(args.cfg_policy)
            start(policy_yaml)
        except KeyboardInterrupt:
            log.warning('Keyboard interruption detected! Shutting down...')
            stop(policy_yaml)
        except Exception:
            log.exception('An error occured during policy execution:')
            return

    #launch web service and wait for oncoming requests
    if args.cfg_srv:
        if args.cfg_policy:
            log.warning(
                'Policy file in parameter is unsused, must be defined through the API in service mode!'
            )
        pk_rest.init_logging()
        evaluator.init_logging()
        pk_rest.app.run(debug=True, host=args.host, port=args.port)