def bill_start_events(self, client, log_server): start_billing_query = SearchQuery( search_range=self.sr, query='phase_event:done AND so_phase:provision') try: start_results = log_server.search(start_billing_query) LOG.debug('Number of start billing events found: ' + str(len(start_results.messages))) for start_event in start_results.messages: rcb_message = {} start_message = json.loads(start_event.message) rcb_message['service_type'] = start_message.get( 'sm_name', 'none') rcb_message['instance_id'] = start_message.get('so_id', 'none') rcb_message['tenant_id'] = start_message.get( 'tenant', 'mcntub') rcb_message['status'] = 'start' LOG.debug('Sending start billing event to RCB: ' + rcb_message.__repr__()) promise = client.basic_publish(exchange='mcn', routing_key='events', body=json.dumps(rcb_message)) client.wait(promise) except Exception as e: LOG.error( 'Cannot issue query to the log service to extract start events.' ) raise e
def setup_connections(self, amqp_url): LOG.debug('Setting up graylog API...') attempts = 20 log_server = GraylogAPI(self.logserver_url, self.logserver_port, self.logserver_user, self.logserver_pass) client = puka.Client(amqp_url) try: time.sleep(15) # let the system settle :-) LOG.debug('AMQP connection to: ' + amqp_url) promise = client.connect() client.wait(promise) except: LOG.error('Cannot connect to the RCB message bus.') while attempts > 0: LOG.debug('Sleeping for 10 secs') time.sleep(10) LOG.debug('AMQP connection to: ' + amqp_url) promise = client.connect() client.wait(promise) attempts = attempts - 1 else: client.close() LOG.error('Giving up attempting to connect to the AMQP bus after number of attempts: ' + str(attempts)) raise RuntimeError('Giving up attempting to connect to the AMQP bus after number of attempts: ' + str(attempts)) self.setup_amqp(amqp_url, client) return client, log_server
def setup_connections(self, amqp_url): LOG.debug('Setting up graylog API...') attempts = 20 log_server = GraylogAPI(self.logserver_url, self.logserver_port, self.logserver_user, self.logserver_pass) client = puka.Client(amqp_url) try: time.sleep(15) # let the system settle :-) LOG.debug('AMQP connection to: ' + amqp_url) promise = client.connect() client.wait(promise) except: LOG.error('Cannot connect to the RCB message bus.') while attempts > 0: LOG.debug('Sleeping for 10 secs') time.sleep(10) LOG.debug('AMQP connection to: ' + amqp_url) promise = client.connect() client.wait(promise) attempts = attempts - 1 else: client.close() LOG.error( 'Giving up attempting to connect to the AMQP bus after number of attempts: ' + str(attempts)) raise RuntimeError( 'Giving up attempting to connect to the AMQP bus after number of attempts: ' + str(attempts)) self.setup_amqp(amqp_url, client) return client, log_server
def run(self): """ This logic does not need to run with the RCB SO and can be ran elsewhere Decision part implementation goes here. require the logging service hardcode to log.cloudcomplab.ch here we poll the logging server for events query for all services that are provision events since the start of this SO query for all services that are destroy events since the start of this SO construct messages to send to AMQP service start bill event: { "service_type": "dnsaas", "instance_id": "sodnsa97979879879", "tenant_id": "mcntub" "status": "start" } stop bill event: { "service_type": "dnsaas", "instance_id": "sodnsa97979879879", "tenant_id": "mcntub" "status": "start" } """ LOG.debug('Waiting for deploy and provisioning to finish') self.event.wait() LOG.debug('Starting runtime logic...') _, _, stack_output = self.so_e.state() attributes = {} for kv in stack_output: attributes[kv['output_key']] = kv['output_value'] amqp_url = '' # "amqp://*****:*****@messaging.demonstrator.info" if 'mcn.endpoint.rcb.mq' in attributes: # TODO return the username and password in the heat response # XXX username and password is hardcoded! amqp_url = 'amqp://*****:*****@' + attributes['mcn.endpoint.rcb.mq'] else: LOG.error( 'mcn.endpoint.rcb.mq is not present in the stack output. amqp_url=' + amqp_url) raise RuntimeError( 'mcn.endpoint.rcb.mq is not present in the stack output. amqp_url=' + amqp_url) client, log_server = self.setup_connections(amqp_url) while not self.destroy_event.is_set(): # TODO separate threads LOG.debug('Executing billing run...') self.bill_start_events(client, log_server) self.bill_stop_events(client, log_server) self.destroy_event.wait(self.sleepy) LOG.debug('Runtime logic ending...') client.close()
def run(self): """ This logic does not need to run with the RCB SO and can be ran elsewhere Decision part implementation goes here. require the logging service hardcode to log.cloudcomplab.ch here we poll the logging server for events query for all services that are provision events since the start of this SO query for all services that are destroy events since the start of this SO construct messages to send to AMQP service start bill event: { "service_type": "dnsaas", "instance_id": "sodnsa97979879879", "tenant_id": "mcntub" "status": "start" } stop bill event: { "service_type": "dnsaas", "instance_id": "sodnsa97979879879", "tenant_id": "mcntub" "status": "start" } """ LOG.debug('Waiting for deploy and provisioning to finish') self.event.wait() LOG.debug('Starting runtime logic...') _, _, stack_output = self.so_e.state() attributes = {} for kv in stack_output: attributes[kv['output_key']] = kv['output_value'] amqp_url = '' # "amqp://*****:*****@messaging.demonstrator.info" if 'mcn.endpoint.rcb.mq' in attributes: # TODO return the username and password in the heat response # XXX username and password is hardcoded! amqp_url = 'amqp://*****:*****@' + attributes['mcn.endpoint.rcb.mq'] else: LOG.error('mcn.endpoint.rcb.mq is not present in the stack output. amqp_url=' + amqp_url) raise RuntimeError('mcn.endpoint.rcb.mq is not present in the stack output. amqp_url=' + amqp_url) client, log_server = self.setup_connections(amqp_url) while not self.destroy_event.is_set(): # TODO separate threads LOG.debug('Executing billing run...') self.bill_start_events(client, log_server) self.bill_stop_events(client, log_server) self.destroy_event.wait(self.sleepy) LOG.debug('Runtime logic ending...') client.close()
def run(self): LOG.info("Initialise policy thread for policy %s" % self.policy.name) self.wait_until_final_state() LOG.info("Starting policy thread for policy %s" % self.policy.name) if self.is_stopped: LOG.info("Cannot start policy threads. PolicyThreads are stopped.") elif self.topology.state in ['DEPLOYED','UPDATED']: self.start_policy_checker_si() LOG.info("Started policy thread for policy %s" % self.policy.name) else: LOG.error( "ERROR: Something went wrong. Seems to be an error. Topology state -> %s. Didn't start the PolicyThread" % self.topology.state)
def run(self): LOG.info("Initialise policy thread for policy %s" % self.policy.name) self.wait_until_final_state() LOG.info("Starting policy thread for policy %s" % self.policy.name) if self.is_stopped: LOG.info("Cannot start policy threads. PolicyThreads are stopped.") elif self.topology.state in ['DEPLOYED', 'UPDATED']: self.start_policy_checker_si() LOG.info("Started policy thread for policy %s" % self.policy.name) else: LOG.error( "ERROR: Something went wrong. Seems to be an error. Topology state -> %s. Didn't start the PolicyThread" % self.topology.state)
def update(self, topology): if self.runtime_agent: self.runtime_agent.stop(topology.id) LOG.debug("Start Updating topology %s" % topology.name) _name = topology.ext_name _template = self.template_manager.get_template(topology) LOG.debug("Stack name: %s" % _name) LOG.debug("Template: %s" % _template) try: stack_details = self.heatclient.update(stack_id=topology.ext_id, template=_template) LOG.debug("stack details after update: %s" % stack_details) LOG.debug("stack id: %s" % topology.ext_id) except Exception, msg: LOG.error(msg) topology.state = 'ERROR' topology.ext_id = None return topology
def bill_start_events(self, client, log_server): start_billing_query = SearchQuery(search_range=self.sr, query='phase_event:done AND so_phase:provision') try: start_results = log_server.search(start_billing_query) LOG.debug('Number of start billing events found: ' + str(len(start_results.messages))) for start_event in start_results.messages: rcb_message = {} start_message = json.loads(start_event.message) rcb_message['service_type'] = start_message.get('sm_name', 'none') rcb_message['instance_id'] = start_message.get('so_id', 'none') rcb_message['tenant_id'] = start_message.get('tenant', 'mcntub') rcb_message['status'] = 'start' LOG.debug('Sending start billing event to RCB: ' + rcb_message.__repr__()) promise = client.basic_publish(exchange='mcn', routing_key='events', body=json.dumps(rcb_message)) client.wait(promise) except Exception as e: LOG.error('Cannot issue query to the log service to extract start events.') raise e
def run(self): LOG.debug("Starting new thread") i = 0 while i < 18: for service_instance in self.topology.service_instances: for unit in service_instance.units: if i == 0: unit.state = 'Initialised' else: unit.state = 'Started' dbm_name = SysUtil().get_sys_conf()['database_manager'] db = FactoryAgent.get_agent(dbm_name) db.update(unit) # conf = SysUtil().get_sys_conf().props # runtime_agent = FactoryAgent().get_agent(conf['runtime_agent']) # runtime_agent.run(self.topology) if i > 1: return time.sleep(2) i += 1 LOG.error("Can't get info on the units after 180 seconds, is there a problem?")
def deploy(self, attributes): """ Deploy method """ if self.stack_id is not None: pass parameters = {} # defining the location of the topology if 'maas.location' in attributes: self.location = parameters['location'] = os.environ[ 'location'] = attributes['maas.location'] LOG.debug("location %s passed via OCCI Attribute" % self.location) self.deployer = FactoryAgent().get_agent(self.conf['deployer']) self.topology_type = topology_mapping[self.location] LOG.info("deploying template %s" % (self.topology_type, )) # read template... f = open(os.path.join(SO_DIR, 'data/topologies', self.topology_type)) template = f.read() f.close() LOG.debug("content of the topology %s" % template) # extracting hot template try: config = yaml.load(template) LOG.debug(config) except yaml.YAMLError, exc: if hasattr(exc, 'problem_mark'): mark = exc.problem_mark LOG.error("Error in configuration file:", exc) LOG.error("Error position: (%s:%s)" % (mark.line + 1, mark.column + 1)) else: LOG.error("Error in configuration file:", exc)
def deploy(self, attributes): """ Deploy method """ if self.stack_id is not None: pass parameters = {} # defining the location of the topology if 'maas.location' in attributes: self.location = parameters['location'] = os.environ['location'] = attributes['maas.location'] LOG.debug("location %s passed via OCCI Attribute"%self.location) self.deployer = FactoryAgent().get_agent(self.conf['deployer']) self.topology_type = topology_mapping[self.location] LOG.info("deploying template %s" % (self.topology_type,)) # read template... f = open(os.path.join(SO_DIR, 'data/topologies', self.topology_type)) template = f.read() f.close() LOG.debug("content of the topology %s" % template) # extracting hot template try: config = yaml.load(template) LOG.debug(config) except yaml.YAMLError, exc: if hasattr(exc, 'problem_mark'): mark = exc.problem_mark LOG.error("Error in configuration file:", exc) LOG.error("Error position: (%s:%s)" % (mark.line + 1, mark.column + 1)) else: LOG.error("Error in configuration file:", exc)
def dispose(self, topology): # checker_thread = self.checker_thread # LOG.debug("Get RuntimeAgent for topology %s" % topology.id) # runtime_agent = self.runtime_agents.get(topology.id) #LOG.debug("Got RuntimeAgent: %s" % self.runtime_agent) stack_details = None topology.state = 'DELETING' self.db.update(topology) if self.runtime_agent: self.runtime_agent.stop(topology.id) try: stack_details = self.heatclient.delete(topology.ext_id) except HTTPNotFound, exc: exc.message = 'Topology \"%s\" was not found on OpenStack anymore. (%s)' % (topology.name, exc.message) topology.state = 'DELETED' topology.detailed_state = exc.message self.db.update(topology) LOG.error(exc.message) raise exc except Exception, msg: LOG.error(msg) topology.state = 'ERROR'
def deploy(self, topology): LOG.debug("Start Deploying") name = topology.name template = self.template_manager.get_template(topology) LOG.debug("stack name: %s" % name) LOG.debug("template: %s" % template) try: # stack_id = stack_details['stack']['id'] stack_details = StackDetails() stack_details.id = 'dummy-id' """ filling the topology with real values """ # res = self.heatclient.list_resources(stack_id) for service_instance in topology.service_instances: for unit in service_instance.units: unit.ext_id = unit.id topology.ext_id = 1 LOG.debug("stack id: dummy-id") except KeyError, exc: LOG.error(KeyError) LOG.error(exc) stack_id = "None"
def dispose(self, topology): # checker_thread = self.checker_thread # LOG.debug("Get RuntimeAgent for topology %s" % topology.id) # runtime_agent = self.runtime_agents.get(topology.id) #LOG.debug("Got RuntimeAgent: %s" % self.runtime_agent) stack_details = None topology.state = 'DELETING' self.db.update(topology) if self.runtime_agent: self.runtime_agent.stop(topology.id) try: stack_details = self.heatclient.delete(topology.ext_id) except HTTPNotFound, exc: exc.message = 'Topology \"%s\" was not found on OpenStack anymore. (%s)' % ( topology.name, exc.message) topology.state = 'DELETED' topology.detailed_state = exc.message self.db.update(topology) LOG.error(exc.message) raise exc except Exception, msg: LOG.error(msg) topology.state = 'ERROR'
def update(self, old, new, extras): if self.stack_id is not None: f = open(os.path.join(BUNDLE_DIR, 'data', 'update.yaml')) template = f.read() f.close() # XXX the attribute mcn.endpoint.mme-pgwc-sgwc must be present, otherwise fail try: mme = new.attributes['mme-pgwc-sgwc_public_ip'] except KeyError: LOG.error('The update method to the service instance was called but a required parameter was not found.') LOG.error('mme-pgwc-sgwc_public_ip must be supplied as an X-OCCI-Attribute header.') LOG.error('Doing nothing...') raise RuntimeError('mme-pgwc-sgwc_public_ip must be supplied as an X-OCCI-Attribute header.') self.deployer.update(self.stack_id, template, self.token, parameters={'mme_pgwc_sgwc_input':mme}) LOG.info('Updated stack ID: ' + self.stack_id.__repr__())
def start_policy_checker_si(self): LOG.debug("Start active_policy check for policy %s on service instance %s" % ( self.policy.name, self.service_instance.name)) while not self.is_stopped: LOG.debug("Locking policy checking from %s" % self.policy.name) self.lock.acquire() LOG.debug("Locked policy checking from %s" % self.policy.name) action = self.policy.action if action.scaling_adjustment > 0: if (len(self.service_instance.units) + action.scaling_adjustment) > self.service_instance.size.get( 'max'): LOG.warning( 'Check upscaling - Maximum number of unit exceeded for service instance: %s' % self.service_instance.name) LOG.debug("Release Policy lock by %s" % self.policy.name) self.lock.release() time.sleep(self.policy.period) continue if action.scaling_adjustment < 0: if (len(self.service_instance.units) + action.scaling_adjustment) < self.service_instance.size.get( 'min'): LOG.warning( 'Check downscaling - Minimum number of unit exceeded for service instance: %s' % self.service_instance.name) LOG.debug("Release Policy lock by %s" % self.policy.name) self.lock.release() time.sleep(self.policy.period) continue if self.service_instance.state != 'UPDATING' and self.check_alarm_si(): LOG.debug('Execute action: %s' % repr(self.policy.action)) if action.adjustment_type == 'ChangeInCapacity': self.service_instance.state = 'UPDATING' self.topology.state = 'UPDATING' if action.scaling_adjustment > 0: if (len( self.service_instance.units) + action.scaling_adjustment) <= self.service_instance.size.get( 'max'): for i in range(action.scaling_adjustment): _hostname = '%s-%s' % ( self.service_instance.name, str(len(self.service_instance.units) + 1)) _state = 'DEFINED' new_unit = Unit(hostname=_hostname, state=_state) new_unit.service_instance_id = self.service_instance.id self.service_instance.units.append(new_unit) self.db.persist(new_unit) else: LOG.warning( 'Maximum number of unit exceeded for service instance: %s' % self.service_instance.name) else: if (len( self.service_instance.units) + action.scaling_adjustment) >= self.service_instance.size.get( 'min'): for i in range(-action.scaling_adjustment): removed_unit = self.remove_unit(self.topology, self.service_instance) self.db.remove(removed_unit) else: LOG.warning( 'Minimum number of unit exceeded for service instance: %s' % self.service_instance.name) topology = self.db.update(self.topology) template = self.template_manager.get_template(self.topology) # LOG.debug("Send update to heat template with: \n%s" % template) try: self.heat_client.update(stack_id=self.topology.ext_id, template=template) self.wait_until_final_state() if not self.topology.state == 'DEPLOYED': LOG.error( "ERROR: Something went wrong. Seems to be an error. Topology state -> %s" % self.topology.state) self.lock.release() return except: self.is_stopped = True self.lock.release() LOG.info('Sleeping (cooldown) for %s seconds' % self.policy.action.cooldown) time.sleep(self.policy.action.cooldown) LOG.debug("Release Policy lock from %s" % self.policy.name) self.lock.release() LOG.info('Sleeping (evaluation period) for %s seconds' % self.policy.period) time.sleep(self.policy.period)
def active_policy_unit(self): LOG.debug("Start active_policy check") while not self.is_stopped: LOG.debug("Locking policy checking by %s" % self.policy.name) self.lock.acquire() for unit in self.service_instance.units: action = self.policy.action if action.scaling_adjustment > 0: if (len(self.service_instance.units) + action.scaling_adjustment) > self.service_instance.size.get( 'max'): LOG.warning( 'Check upscaling - Maximum number of unit exceeded for service instance: %s' % self.service_instance.name) break if action.scaling_adjustment < 0: if (len(self.service_instance.units) + action.scaling_adjustment) < self.service_instance.size.get( 'min'): LOG.warning( 'Check downscaling - Minimum number of unit exceeded for service instance: %s' % self.service_instance.name) break if self.service_instance.state != 'UPDATING' and self.check_alarm_unit(unit, self.monitor): LOG.debug('Execute action: %s' % repr(self.policy.action)) if action.adjustment_type == 'ChangeInCapacity': self.service_instance.state = 'UPDATING' self.topology.state = 'UPDATING' if action.scaling_adjustment > 0: if (len( self.service_instance.units) + action.scaling_adjustment) <= self.service_instance.size.get( 'max'): for i in range(action.scaling_adjustment): _hostname = '%s-%s' % ( self.service_instance.name, str(len(self.service_instance.units) + 1)) _state = 'Initialised' new_unit = Unit(hostname=_hostname, state=_state) self.service_instance.units.append(new_unit) else: LOG.warning( 'Maximum number of unit exceeded for service instance: %s' % self.service_instance.name) else: if (len( self.service_instance.units) + action.scaling_adjustment) >= self.service_instance.size.get( 'min'): for i in range(-action.scaling_adjustment): self.remove_unit(self.topology, self.service_instance) else: LOG.warning( 'Minimum number of unit exceeded for service instance: %s' % self.service_instance.name) try: self.db.update(self.topology) except Exception, msg: LOG.error(msg) self.topology.state='ERROR' self.topology.ext_id = None template = self.template_manager.get_template(self.topology) # LOG.debug("Send update to heat template with: \n%s" % template) self.heat_client.update(stack_id=self.topology.ext_id, template=template) LOG.info('Sleeping (cooldown) for %s seconds' % self.policy.action.cooldown) time.sleep(self.policy.action.cooldown) LOG.debug("Release Policy lock by %s" % self.policy.name) self.lock.release() LOG.info('Sleeping (evaluation period) for %s seconds' % self.policy.period) time.sleep(self.policy.period)
except yaml.YAMLError, exc: if hasattr(exc, 'problem_mark'): mark = exc.problem_mark LOG.error("Error in configuration file:", exc) LOG.error("Error position: (%s:%s)" % (mark.line + 1, mark.column + 1)) else: LOG.error("Error in configuration file:", exc) # creating the topology object try: topology = TopologyOrchestrator.create(config) except NotFoundException, msg: LOG.error(msg) return except NotUniqueException, msg: LOG.error(msg) return except NotDefinedException, msg: LOG.error(msg) return except InvalidInputException, msg: LOG.error(msg) return except TypeErrorException, msg: LOG.error(msg) return except Exception, msg: LOG.error(msg) return # deploying the topology
if hasattr(exc, 'problem_mark'): mark = exc.problem_mark LOG.error("Error in configuration file:", exc) LOG.error("Error position: (%s:%s)" % (mark.line + 1, mark.column + 1)) else: LOG.error("Error in configuration file:", exc) # creating the topology object try: topology = TopologyOrchestrator.create(config) except NotFoundException, msg: LOG.error(msg) return except NotUniqueException, msg: LOG.error(msg) return except NotDefinedException, msg: LOG.error(msg) return except InvalidInputException, msg: LOG.error(msg) return except TypeErrorException, msg: LOG.error(msg) return except Exception, msg: LOG.error(msg) return # deploying the topology
topology.state = 'ERROR' topology.ext_id = None raise else: LOG.debug("Restart topology %s" % topology.name) #check that the topology is still valid try: self.checker.check(topology=topology) print "finish check" except Exception, exc: exc.message = 'Topology \"%s\" is not valid anymore. (%s)' % ( topology.name, exc.message) topology.state = 'ERROR' topology.detailed_state = exc.message self.db.update(topology) LOG.error(exc.message) raise exc #check that the topology already exists on OpenStack try: stack_details = self.heatclient.show(topology.ext_id) except HTTPNotFound, exc: exc.message = 'Topology \"%s\" was not found on OpenStack anymore. (%s)' % ( topology.name, exc.message) topology.state = 'DELETED' topology.detailed_state = exc.message self.db.update(topology) LOG.error(exc.message) raise exc LOG.debug("Starting RuntimeAgent for topology %s." % topology.id) self.runtime_agent.start(topology) #self.register_agent.start()
class SoExecution(service_orchestrator.Execution): """ class docs """ def __init__(self, token, tenant_name): """ Constructor """ super(SoExecution, self).__init__(token, tenant_name) # by default self.topology_type = "topology-maas-bern.json" self.token = token self.tenant_name = tenant_name self.stack_id = None self.maas = None self.location = 'bern' # make sure we can talk to deployer... LOG.debug("sending request to the url %s" % os.environ['DESIGN_URI']) self.conf = sys_util().get_sys_conf() LOG.debug("instantiating deployer %s" % self.conf['deployer']) self.deployer = None def deploy(self, attributes): """ Deploy method """ if self.stack_id is not None: pass parameters = {} # defining the location of the topology if 'maas.location' in attributes: self.location = parameters['location'] = os.environ[ 'location'] = attributes['maas.location'] LOG.debug("location %s passed via OCCI Attribute" % self.location) self.deployer = FactoryAgent().get_agent(self.conf['deployer']) self.topology_type = topology_mapping[self.location] LOG.info("deploying template %s" % (self.topology_type, )) # read template... f = open(os.path.join(SO_DIR, 'data/topologies', self.topology_type)) template = f.read() f.close() LOG.debug("content of the topology %s" % template) # extracting hot template try: config = yaml.load(template) LOG.debug(config) except yaml.YAMLError, exc: if hasattr(exc, 'problem_mark'): mark = exc.problem_mark LOG.error("Error in configuration file:", exc) LOG.error("Error position: (%s:%s)" % (mark.line + 1, mark.column + 1)) else: LOG.error("Error in configuration file:", exc) # creating the topology object try: topology = TopologyOrchestrator.create(config) except NotFoundException, msg: LOG.error(msg) return
LOG.exception(exc) topology.state = 'ERROR' topology.ext_id = None raise else: LOG.debug("Restart topology %s" % topology.name) #check that the topology is still valid try: self.checker.check(topology=topology) print "finish check" except Exception, exc: exc.message = 'Topology \"%s\" is not valid anymore. (%s)' % (topology.name, exc.message) topology.state = 'ERROR' topology.detailed_state = exc.message self.db.update(topology) LOG.error(exc.message) raise exc #check that the topology already exists on OpenStack try: stack_details = self.heatclient.show(topology.ext_id) except HTTPNotFound, exc: exc.message = 'Topology \"%s\" was not found on OpenStack anymore. (%s)' % (topology.name, exc.message) topology.state = 'DELETED' topology.detailed_state = exc.message self.db.update(topology) LOG.error(exc.message) raise exc LOG.debug("Starting RuntimeAgent for topology %s." % topology.id) self.runtime_agent.start(topology) #self.register_agent.start() return topology
def active_policy_unit(self): LOG.debug("Start active_policy check") while not self.is_stopped: LOG.debug("Locking policy checking by %s" % self.policy.name) self.lock.acquire() for unit in self.service_instance.units: action = self.policy.action if action.scaling_adjustment > 0: if (len(self.service_instance.units) + action.scaling_adjustment ) > self.service_instance.size.get('max'): LOG.warning( 'Check upscaling - Maximum number of unit exceeded for service instance: %s' % self.service_instance.name) break if action.scaling_adjustment < 0: if (len(self.service_instance.units) + action.scaling_adjustment ) < self.service_instance.size.get('min'): LOG.warning( 'Check downscaling - Minimum number of unit exceeded for service instance: %s' % self.service_instance.name) break if self.service_instance.state != 'UPDATING' and self.check_alarm_unit( unit, self.monitor): LOG.debug('Execute action: %s' % repr(self.policy.action)) if action.adjustment_type == 'ChangeInCapacity': self.service_instance.state = 'UPDATING' self.topology.state = 'UPDATING' if action.scaling_adjustment > 0: if (len(self.service_instance.units) + action.scaling_adjustment ) <= self.service_instance.size.get('max'): for i in range(action.scaling_adjustment): _hostname = '%s-%s' % ( self.service_instance.name, str( len(self.service_instance.units) + 1)) _state = 'Initialised' new_unit = Unit(hostname=_hostname, state=_state) self.service_instance.units.append( new_unit) else: LOG.warning( 'Maximum number of unit exceeded for service instance: %s' % self.service_instance.name) else: if (len(self.service_instance.units) + action.scaling_adjustment ) >= self.service_instance.size.get('min'): for i in range(-action.scaling_adjustment): self.remove_unit(self.topology, self.service_instance) else: LOG.warning( 'Minimum number of unit exceeded for service instance: %s' % self.service_instance.name) try: self.db.update(self.topology) except Exception, msg: LOG.error(msg) self.topology.state = 'ERROR' self.topology.ext_id = None template = self.template_manager.get_template( self.topology) # LOG.debug("Send update to heat template with: \n%s" % template) self.heat_client.update(stack_id=self.topology.ext_id, template=template) LOG.info('Sleeping (cooldown) for %s seconds' % self.policy.action.cooldown) time.sleep(self.policy.action.cooldown) LOG.debug("Release Policy lock by %s" % self.policy.name) self.lock.release() LOG.info('Sleeping (evaluation period) for %s seconds' % self.policy.period) time.sleep(self.policy.period)
def start_policy_checker_si(self): LOG.debug( "Start active_policy check for policy %s on service instance %s" % (self.policy.name, self.service_instance.name)) while not self.is_stopped: LOG.debug("Locking policy checking from %s" % self.policy.name) self.lock.acquire() LOG.debug("Locked policy checking from %s" % self.policy.name) action = self.policy.action if action.scaling_adjustment > 0: if (len(self.service_instance.units) + action.scaling_adjustment ) > self.service_instance.size.get('max'): LOG.warning( 'Check upscaling - Maximum number of unit exceeded for service instance: %s' % self.service_instance.name) LOG.debug("Release Policy lock by %s" % self.policy.name) self.lock.release() time.sleep(self.policy.period) continue if action.scaling_adjustment < 0: if (len(self.service_instance.units) + action.scaling_adjustment ) < self.service_instance.size.get('min'): LOG.warning( 'Check downscaling - Minimum number of unit exceeded for service instance: %s' % self.service_instance.name) LOG.debug("Release Policy lock by %s" % self.policy.name) self.lock.release() time.sleep(self.policy.period) continue if self.service_instance.state != 'UPDATING' and self.check_alarm_si( ): LOG.debug('Execute action: %s' % repr(self.policy.action)) if action.adjustment_type == 'ChangeInCapacity': self.service_instance.state = 'UPDATING' self.topology.state = 'UPDATING' if action.scaling_adjustment > 0: if (len(self.service_instance.units) + action.scaling_adjustment ) <= self.service_instance.size.get('max'): for i in range(action.scaling_adjustment): _hostname = '%s-%s' % ( self.service_instance.name, str(len(self.service_instance.units) + 1)) _state = 'DEFINED' new_unit = Unit(hostname=_hostname, state=_state) new_unit.service_instance_id = self.service_instance.id self.service_instance.units.append(new_unit) self.db.persist(new_unit) else: LOG.warning( 'Maximum number of unit exceeded for service instance: %s' % self.service_instance.name) else: if (len(self.service_instance.units) + action.scaling_adjustment ) >= self.service_instance.size.get('min'): for i in range(-action.scaling_adjustment): removed_unit = self.remove_unit( self.topology, self.service_instance) self.db.remove(removed_unit) else: LOG.warning( 'Minimum number of unit exceeded for service instance: %s' % self.service_instance.name) topology = self.db.update(self.topology) template = self.template_manager.get_template( self.topology) # LOG.debug("Send update to heat template with: \n%s" % template) try: self.heat_client.update(stack_id=self.topology.ext_id, template=template) self.wait_until_final_state() if not self.topology.state == 'DEPLOYED': LOG.error( "ERROR: Something went wrong. Seems to be an error. Topology state -> %s" % self.topology.state) self.lock.release() return except: self.is_stopped = True self.lock.release() LOG.info('Sleeping (cooldown) for %s seconds' % self.policy.action.cooldown) time.sleep(self.policy.action.cooldown) LOG.debug("Release Policy lock from %s" % self.policy.name) self.lock.release() LOG.info('Sleeping (evaluation period) for %s seconds' % self.policy.period) time.sleep(self.policy.period)