def __init__(self, jira_id): dbobj = DBHandler() host = dbobj.getValue("jira_uri") user = dbobj.getValue("jira_user") password = dbobj.getValue("jira_password") self.jira_obj = JIRA(server=host, basic_auth=(user, password)) self.jira_id = jira_id
def action_zone(task_id, params, script_name=None): try: class_name = get_script_class_name(script_name) class_method = globals().get(class_name, None) logger.debug("Class Name Found - %s, Method - %s" % (class_name, class_method)) if class_method is None: raise Exception( "No Scripts - %s can be found to deploy for task_id - %s" % (script_name, task_id)) dbobj = DBHandler() action_info = dbobj.getActionInfoObj(task_id) action_info.update_status('INPROGRESS') logger.debug("TaskObj - Status - %s" % action_info.status.name) script_obj = class_method(action_info, params=params) action = action_info.action.name.lower() logger.debug( "Exec %s on %s, task id - %s for zone - %s with params - %s" % (action, action_info.project, action_info.id, params.get('zone'), params)) getattr(script_obj, action)() logger.info("Task[%s] for Project- %s for zone- %s is completed" % (action_info.id, action_info.project, params.get('zone'))) if ActionInfo.objects.get( id=task_id).config_dict().get('is_canary') is True: closure_task(task_id=task_id) except Exception, e: logger.exception(e) if ActionInfo.objects.get( id=task_id).config_dict().get('is_canary') is True: EdgeCache(task_id, params.get('zone')).set(task_status='FAILED') closure_error_task(task_id=task_id) raise Exception(e)
def create_staggered_batches(self, serverlist, otherserverlist=[], staggered_batch=None): oos_servers = [] is_servers = [] # Serverlist may contains duplicate servers for servers in serverlist: if servers.get('enabled_status', False) is True: is_servers.append(servers.get('server')) else: oos_servers.append(servers.get('server')) is_servers = list(set(is_servers)) oos_servers = list(set(oos_servers) - set(is_servers)) staggered_batch_config = DBHandler().getValue("staggered_batch_config") batch_size = None if staggered_batch is not None: if self.validate_staggered_batch(staggered_batch): batch_size = staggered_batch if batch_size is None: if len(is_servers) >= 10: batch_size = staggered_batch_config.get("10") elif len(is_servers) >= 8: batch_size = staggered_batch_config.get("8") elif len(is_servers) >= 4: batch_size = staggered_batch_config.get("4") elif len(is_servers) >= 2: batch_size = staggered_batch_config.get("2") elif len(is_servers) >= 1: batch_size = staggered_batch_config.get("1") batch_servers = {} if batch_size is not None: old_batch_count = 0 for batch in batch_size.split(","): batch_number = int(batch.strip('%')) batch_count = int((len(is_servers) * batch_number) / 100) batch_servers[batch] = is_servers[old_batch_count:batch_count] old_batch_count = batch_count otherservers = [servers.get('server') for servers in otherserverlist] otherservers = list( set(otherservers) - set(is_servers).union(set(oos_servers))) otherservers.extend(oos_servers) if otherservers: batch_size = staggered_batch_config.get("0").strip( ) if batch_size is None else batch_size + "," + staggered_batch_config.get( "0").strip() batch_servers[batch_size.split(",")[-1]] = otherservers self.non_canary_staggered_label = [ staggered_batch_config.get("0").strip(), staggered_batch_config.get("1").strip() ] self.staggered_batch = batch_size self.__current_staggered_label = batch_size.split(",")[0] self.staggered_servers = batch_servers
def __init__(self): try: self.consul_client = None self.dbobj = DBHandler() self.consul_host = self.dbobj.getValue("consul_host") logger.debug(self.consul_host) if not self.consul_host: raise ValueError('consul host is mandatory') self.consul_client = consul.Consul(host=self.consul_host) except Exception as e: logger.error("exeption conneting to consul server - %s" % (self.consul_client)) raise e
class ELKHandler(): """This class will be used to read/write data to ELK.""" def __init__(self, correlation=None): """ES Intialization.""" self.dbobj = DBHandler() self.host = self.dbobj.getValue("es_host") timeout = int(self.dbobj.getValue("es_timeout")) retries = int(self.dbobj.getValue("es_retries")) if not self.host or not timeout or not retries: raise AttributeError('Not able to create ES object') self.es = Elasticsearch(hosts=self.host, timeout=timeout, max_retries=retries) self.index = self.dbobj.getValue("es_index") self.detail_index = self.dbobj.getValue("es_detail_index") self.read_index = self.dbobj.getValue("es_read_index") self.read_detail_index = self.dbobj.getValue("es_read_detail_index") self.es_read_count = int(self.dbobj.getValue("es_read_count")) self.es_delete_template = self.dbobj.getValue("es_delete_template") self.correlation = correlation if not self.correlation: raise ValueError('Correlation key is mandatory for ES operations') def send(self, data, detailed=False): """write operation to ELK.""" try: localtime = datetime.datetime.now() index_name = self.index if not detailed else self.detail_index elk_data = [] index_data = {"index": {"_index" : index_name }} if type(data) is dict: data['timestamp'] = localtime data['worker_host'] = socket.gethostname() data['correlation'] = self.correlation elk_data.append(index_data) elk_data.append(data) elif type(data) is list: for record in data: record['timestamp'] = localtime record['worker_host'] = socket.gethostname() record['correlation'] = self.correlation elk_data.append(index_data) elk_data.append(record) else: raise Exception("Unsupported type receieved for data writing - %s, data - %s" % (type(data), data)) logger.info("Data Writing in ELK - %s" % elk_data) if len(elk_data) > 0: self.es.bulk(index=index_name, doc_type='edge', body=elk_data) self.es.indices.refresh(index=index_name) else: logger.error("No data available for ELK writing ELK_DATA - %s, DATA - %s" %(elk_data, data)) except Exception, e: logger.exception(e) message = "Unable to Connect to ES cluster - %s, Error - %s" % (self.host, str(e)) raise Exception(message)
class CanaryImplementation(): dbobj = DBHandler() def __init__(self): pass @classmethod def schedule_canary_task(cls, action_task_id, jira_id, project_name, staggered_label, zone, staggered_servers, non_staggered_servers): try: data = { 'ccrid': action_task_id, 'jiraid': jira_id, 'project_name': project_name, 'staggered_status': staggered_label, 'datacenter': zone.lower(), 'verf_server': ",".join(staggered_servers), 'prod_server': ",".join(non_staggered_servers), 'respond_to': 'edge', } logger.info("Parameter Receieved to Schedule CANARY Task - %s" % data) canary_uri = cls.dbobj.getValue("canary_schedule_api") canary_success_code = [200, 119] canary_config_failure_code = [111] message = None status = None code = 200 response = requests.post(canary_uri, data=json.dumps(data)) if response.status_code == 200: response_data = response.json() logger.info( "Response from Canary for ActionInfo Task Id - %s, zone - %s, staggered_label - %s, Data - %s" % (action_task_id, zone, staggered_label, response_data)) if response_data.get('status') is True or response_data.get( 'code') in canary_success_code: logger.info( "Successfully Called Canary for ActionInfo Task Id - %s, zone - %s, Staggered_label - %s" % (action_task_id, zone, staggered_label)) status = True elif response_data.get('status') is True or response_data.get( 'code') in canary_config_failure_code: status = False message = response_data.get('message') code = response_data.get('code') else: error_message = response_data.get('message') raise Exception(error_message) else: error_message = "Unknown Error Occured via calling canary API, Status - %s, Error - %s" % ( response.status_code, response.text) logger.error(error_message) raise Exception(error_message) except Exception, e: status = False message = str(e) code = 499 logger.exception(e) finally:
class ConsulOperation(): def __init__(self): try: self.consul_client = None self.dbobj = DBHandler() self.consul_host = self.dbobj.getValue("consul_host") logger.debug(self.consul_host) if not self.consul_host: raise ValueError('consul host is mandatory') self.consul_client = consul.Consul(host=self.consul_host) except Exception as e: logger.error("exeption conneting to consul server - %s" % (self.consul_client)) raise e def setKV(self, key, value): try: self.consul_client.kv.put(key, value) return ("sucessfully saved to consul") except Exception as e: logger.error("%s Got exception while seting KV to consul -" % (key)) logger.exception(e) return e def getKV(self, key): try: index, data = self.consul_client.kv.get(key) logger.info("%s Sucessfully get key" % (key)) return data['Value'] except Exception as e: logger.error("failed to get key from consul") logger.exception(e) return e
def check_queue_entries(): dbobj = DBHandler() queued_tasks = ActionQueue.objects.all() logger.info("starting to check queue entries") for task in queued_tasks: if task.action.name == "restart": queue_retries = int(dbobj.getValue("restart_queue_retries")) elif task.action.name == "deploy": queue_retries = int(dbobj.getValue("deploy_queue_retries")) else: queue_retries = 10 if task.retries < queue_retries: server_list = [] total_servers = task.server_list() server_list.extend(total_servers) check_queue = DashUtils().check_active_servers(server_list, task.space, task.project.name, task.action.name) if len(check_queue) > 0: task.retries += 1 logger.debug("updating retries %d for project %s from action queue" % (task.retries, task.project.name)) task.save() else: host_uri=dbobj.getValue("edge_host") url = host_uri + "/space/" + task.space.name + "/" + task.project.name + "/" + task.action.name + "/" payload = {'content':{'servers': server_list, 'user': task.user.username}} start_task = requests.post(url, data=json.dumps(payload), headers={'Content-Type': 'application/json'}) action_id = start_task.json().get('action_id', None) logger.debug("started task %s for queue entry - %s, %s -- %s" % (action_id, task.project.name, task.user.username, dbobj.getValue("st2_user"))) if task.user.username == dbobj.getValue("st2_user"): logger.info("sending response to st2 for queue id %s and task id %s" % (task.id, action_id)) payload = {'queue_id': task.id, 'action_id': action_id, 'status': True} send_st2 = send_response_to_st2(dbobj, payload) logger.info("Response recieved from st2 - %s" %(send_st2.text)) task.delete() else: # Jira update jira_id = Project.objects.get(name=task.project).config_dict().get('jira_id') host_uri= dbobj.getValue("edge_host") jira_msg = "Unable to perform action. Maximum retry count has been exceeded." jira_obj = Jira(jira_id) jira_obj.comment(str(jira_msg)) # st2 update if task.user.username == dbobj.getValue("st2_user"): logger.info("sending response queue_failed to st2 for queue id %s" % (task.id)) payload = {'queue_id': task.id, 'status': False, 'error': jira_msg} send_st2 = send_response_to_st2(dbobj, payload) logger.info("Response recieved from st2 - %s" %(send_st2.text)) task.delete() return True
def separate_servers_by_zone(serverlist): servers = {} zone_prefix = DBHandler().getValue("zone_ip_prefix") for zone in zone_prefix: if zone not in servers: servers[zone] = [] match = re.compile(zone_prefix[zone]) for server in serverlist: if re.match(match, server): servers[zone].append(server) return servers
def __init__(self, correlation=None): """ES Intialization.""" self.dbobj = DBHandler() self.host = self.dbobj.getValue("es_host") timeout = int(self.dbobj.getValue("es_timeout")) retries = int(self.dbobj.getValue("es_retries")) if not self.host or not timeout or not retries: raise AttributeError('Not able to create ES object') self.es = Elasticsearch(hosts=self.host, timeout=timeout, max_retries=retries) self.index = self.dbobj.getValue("es_index") self.detail_index = self.dbobj.getValue("es_detail_index") self.read_index = self.dbobj.getValue("es_read_index") self.read_detail_index = self.dbobj.getValue("es_read_detail_index") self.es_read_count = int(self.dbobj.getValue("es_read_count")) self.es_delete_template = self.dbobj.getValue("es_delete_template") self.correlation = correlation if not self.correlation: raise ValueError('Correlation key is mandatory for ES operations')
def __init__(self, task_obj=None, params={}, lbobj=None): BaseOrchestration.__init__(self, task_obj=task_obj, params=params) StaggeredImplementation.__init__(self) self.lbobj = LB(task_id=task_obj.id) if lbobj is None else lbobj self.cacheobj = EdgeCache(self.task_obj.id, self.params.get('zone')) self.dbobj = DBHandler() self.batch_servers_bypool = {} # The below param can be updated from ELK on object initialization, needed for Canary Implementation --- Discussion Later # deployed_server_list also contains the failed servers as well. self.deployed_server_list = [] self.is_canary = False self.all_servers_with_status = [] self.pool_servers_with_status = [] self.is_hard = False
def __init__(self, task_obj=None, params={}, lbobj=None): BaseOrchestration.__init__(self, task_obj=task_obj, params=params) self.lbobj = LB(task_id=task_obj.id) if lbobj is None else lbobj self.dbobj = DBHandler() self.failed_server_list = [] self.deployed_server_list = [] self.profile = self.config.get('profile') self.action = None self.aws_lb_info = self.config.get('aws_lb_info') self.all_servers_with_status = [] self.all_instances = [] self.already_oos_servers = [] self.all_servers_with_instances = [] self.success_healthcheck = []
def get_task_status(task_id): status = None data = EdgeCache(task_id).get() if data is None or type(data) is not dict or len(data) == 0: return status TASK_STATUS_PRIORITY = DBHandler().getActionStatus() TASK_STATUS_PRIORITY.append(None) for zone in data.get('zone', []): rdata = EdgeCache(task_id, zone).get() if rdata.get( 'task_status' ) in TASK_STATUS_PRIORITY and TASK_STATUS_PRIORITY.index( rdata.get('task_status')) < TASK_STATUS_PRIORITY.index(status): status = rdata.get('task_status') return status
def killing_task(task_id, reason=None, user=None, kill_action='MANUAL_FAILED'): dbobj = DBHandler() task_obj = dbobj.getActionInfoObj(task_id) killed_status = False celery_stop_task_status = ['REVOKED', 'SUCCESS', 'FAILED'] # if killed_status is True: dbobj.updateActionInfoEndTime(task_id) dbobj.setActionInfoState(task_id, kill_action.upper(), reason) username = user.username if user else "anonymous_user" killing_logs = "task is %s by %s, reason %s, killed_status %s" % ( kill_action.upper(), username, reason, killed_status) ELKHandler(task_id).write_logs(exception=killing_logs) try: for task in task_obj.task_ids.split(","): retry_count = 3 task_killed_status = False if task is not None and str(task).strip != "": while retry_count > 0 and task_killed_status is False: logger.info( "Revoking Task Id - %s for Action Task Id - %s, Trying - %s" % (task, task_id, retry_count)) app.control.revoke(task, terminate=True) time.sleep(1) response = AsyncResult(task) if response.status.upper() in celery_stop_task_status: task_killed_status = True logger.info( "Task ID - %s has been revoked with status - %s and state - %s" % (task, response.status, response.state)) retry_count -= 1 if response.status.upper() == 'REVOKED': killed_status = True if task_killed_status is False: exception = "Unable to Revoke on Action Info Id - %s celery task id - %s with state %s" % ( task_id, task, response.state) ELKHandler(task_id).write_logs(exception=exception) except Exception as e: logger.error(e)
def closure_error_task(*args, **kwargs): logger.debug(args) logger.debug(kwargs) task_id = kwargs.get('task_id', None) logger.info("Executing Closure Error Task for id - %s " % task_id) status = get_task_status(task_id) if status is None: status = 'FAILED' try: action_obj = get_action_object(task_id) configs = action_obj.config_dict() jira_id = configs.get('jira_id', None) completion_status = check_completion_server_wise(action_obj) if jira_id is not None and action_obj.action.name.lower( ) == "deploy": jira_obj = Jira(jira_id) jira_obj.update_transition("Failed") if configs.get('notify_me', False): space_name = action_obj.project.space.name recipients = action_obj.project.space.dls() cc = [] host_uri = Configs().getValue("edge_host") uri = "%s%s" % (host_uri, reverse('action', args=(space_name, task_id))) message = "Error for Edge Task Id - %s, check status : - %s" % ( task_id, uri) subject = 'Edge notify Error for %s [#%s]' % ( action_obj.project.name, task_id) edge_email = EmailMessage(subject, message, settings.EMAIL_FROM, recipients, cc=cc) edge_email.content_subtype = "html" edge_email.send(fail_silently=False) except Exception as e: logger.exception(e) dbobj = DBHandler() dbobj.setActionInfoState(task_id, status) dbobj.updateActionInfoEndTime(task_id) logger.info("Executed Closure Error Task for id - %s " % task_id)
def get_action_object(task_id): dbobj = DBHandler() return dbobj.getActionInfoObj(task_id)
def post(self, request, space_name=None, project_name=None, action=None): self.d['error'] = None self.resp_status = 200 try: if space_name and project_name and action: self.d['space'] = Space.objects.get(name=space_name) self.d['project'] = Project.objects.get(name=project_name) self.d['action'] = EPlan.objects.get(name=action) self.d['servers'] = request.POST.getlist('servers', []) self.d['countdown'] = request.POST.get('countdown', 10) self.api_resp['project'] = project_name reason = request.POST.get('reason', None) if not request.user.is_authenticated: api_data = JSONParser().parse(request).get('content') logger.info("data recieved %s" % api_data) user = request.user if request.user.is_authenticated( ) else None if user is None: try: user = User.objects.get(username=api_data.get('user')) except User.DoesNotExist as e: user = LDAPBackend().populate_user( api_data.get('user')) except Exception as e: logger.exception(e) logger.debug("%s------%s" % (request.user.is_authenticated(), user)) if not request.user.is_authenticated(): self.d['servers'] = api_data.get('servers', None) self.d['countdown'] = api_data.get('countdown', 10) if not self.d['servers'] or len(self.d['servers']) == 0: logger.info('no servers found, calc servers') server_info = DashUtils.get_server_object( self.d['project']) servers = [] for info in server_info: servers.extend(info.get('servers', [])) self.d['servers'] = list(set(servers)) elif self.d['servers'] is None or self.d[ 'servers'] == "" or "" in self.d['servers']: raise Exception('No servers selected in Jira') retries = 0 dbobj = DBHandler() self.d['servers'] = list(set(self.d['servers'])) try: project_config = self.d['project'].config_dict() # Expecting user sends description as string through api if reason is None: reason = project_config.get("description", "Unknown Reason") if type(reason) is not dict: description = {} for s in self.d['servers']: description[s] = reason # Adding/updating description in config & making new config project_config['description'] = description new_config = json.dumps(project_config, indent=4) # Saving new config project = Project.objects.get(name=self.d['project']) project.config = new_config project.save() logger.info( "Successfully updated description for project - {project}" .format(project=self.d['project'])) else: # If it is already dict, assuming it is correct. logger.info( "Not updating description for project - {project}". format(project=self.d['project'])) except Exception as e: logger.info( "Exception while updating description - {error} for project {project}" .format(error=e, project=self.d['project'])) active_servers = DashUtils().check_active_servers( self.d['servers'], self.d['space'].name, self.d['project'].name, self.d['action'].name) self.d['project'] = Project.objects.get(name=project_name) if len(self.d['servers']) > 0 and len(active_servers) > 0: existing_queue_servers = DashUtils( ).check_existing_queue_servers(self.d['project'], self.d['space'], self.d['action'], self.d['servers']) if len(existing_queue_servers) > 0: raise Exception( 'Unable to perform action !!! There is already a task with similar action in queue.' ) else: # active_actions = [] conf = self.d['project'].total_config_dict() jira_id = conf.get('jira_id') conf['countdown'] = int(self.d['countdown']) conf = json.dumps(conf) get_queue = DashUtils().add_entry_to_action_queue( space=self.d['space'], project=self.d['project'], servers=self.d['servers'], action=self.d['action'], user=user, config=conf) active_tasks = DashUtils().get_active_tasks( self.d['project']) # active_actions = [active_actions + str(i) for i in active_tasks] self.api_resp['queue_id'] = get_queue.id try: if jira_id: host_uri = dbobj.getValue("edge_host") # url="%s%s" %(host_uri, reverse('action', args=(self.d['space'].name, active_tasks[0]))) jira_msg = "Queued this task" jira_obj = Jira(jira_id) jira_obj.comment(str(jira_msg)) except Exception as e: logger.exception(e) raise AssertionError( 'Queued this task . Currently active task id - %s' % (active_tasks)) if len(self.d['servers']) > 0: # add entry to ActionInfo and redirect to action page self.d['servers'] = json.dumps(self.d['servers']) conf = self.d['project'].total_config_dict() if self.d['action'].name in [ "rollback", "hard_rollback" ] and conf.get('rollback_version', None) is not None: conf['version'] = conf.get('rollback_version', None) elif self.d['action'].name in [ "rollback", "hard_rollback" ]: raise ValueError( "rollback version is mandatory for action %s" % (self.d['action'].name)) conf['countdown'] = int(self.d['countdown']) conf = json.dumps(conf) status = ActionStatus.objects.get(name='BUILDING') script_file_name = self.d['project'].env.script_file_name new_action = ActionInfo(project=self.d['project'], action=self.d['action'], servers=self.d['servers'], config=conf, user=user, status=status, script_file_name=script_file_name) new_action.save() logger.info("added a new action %s" % new_action.id) self.d['action_id'] = new_action.id self.api_resp['action_id'] = new_action.id # call to backend tasks task = orchestration_action.apply_async( (new_action.id, ), countdown=int(self.d['countdown'])) # Store Task Ids new_action.task_ids = task.id new_action.save() if request.user.is_authenticated(): return HttpResponseRedirect( reverse('action', args=( space_name, new_action.id, ))) else: raise ValueError('Insufficient data') except AssertionError as ae: logger.exception(ae) self.d['error'] = ae self.api_resp['error'] = str(ae) self.resp_status = 200 except Exception as e: logger.exception(e) self.d['error'] = e self.api_resp['error'] = str(e) self.resp_status = 500 if not request.user.is_authenticated(): return JsonResponse(self.api_resp, status=self.resp_status) else: return render(request, 'plan.html', self.d)
error_message = "Canary [%s] status is %s for staggered - %s, so not scheduling any further task"\ %(zone, task_status, current_staggered_label) raise Exception(error_message) else: script_name = action_info_obj.script_file_name staggered_batch = data.get('staggered_batch', "").split(",") current_staggered_label_index = staggered_batch.index( current_staggered_label) next_staggered_label = staggered_batch[ current_staggered_label_index + 1] edge_cache_obj.set( current_staggered_label=next_staggered_label) params = data.get('params', None) celery_task_id = zone_action.delay(action_info_obj.id, params, script_name) DBHandler().updateActionInfoTaskIds(task_id, celery_task_id) message = "Canary [%s] has status - %s for staggered label - %s, so scheduled task for " \ "label - %s" % (zone, task_status, current_staggered_label, next_staggered_label) ELKHandler(task_id).write_logs(exception=message, level='info') except Exception, e: status = False code = 500 logger.exception(e) error_message = "%s - %s" % ( "Unable to schedule Action Info Task after canary Response", str(e)) logger.error(error_message) EdgeCache(task_id, zone).set(task_status='FAILED') """DBHandler().setActionInfoState(task_id, 'FAILED') Jira(action_info_obj.config_dict().get('jira_id')).update_transition("FAILED")""" closure_error_task(task_id=task_id)
def closure_task(*args, **kwargs): logger.info(args) logger.info(kwargs) task_id = kwargs.get('task_id', None) logger.info("Executing Closure Task for id - %s " % task_id) status = get_task_status(task_id) dbobj = DBHandler() if status is None: try: action_obj = get_action_object(task_id) configs = action_obj.config_dict() if configs.get('zone_parallelazation', True) is False: dbobj.setActionInfoState(task_id, 'BUILDING') action.delay(task_id) except Exception, e: raise Exception(e) try: # check if all servers are deployed/restarted for this action action_obj = get_action_object(task_id) completion_status = check_completion_server_wise(action_obj) if completion_status is True: status = 'COMPLETED' logger.info( "task is completed over all the servers. updating over jira, consul" ) dbobj.setActionInfoState(task_id, status) # Updating over Grafana if configs.get('dashboard_name', None) is not None: if action_obj.action.name.lower() in [ "deploy", "hard_deploy", "rollback", "hard_rollback" ]: try: task_end_time = localtime(action_obj.task_end_time) dashboard = configs.get('dashboard_name', None) version = configs.get('version', None) grafanaobj = Grafana(dashboard) response = grafanaobj.send_annotations( task_id, task_end_time, version, action_obj.project.space.name, action_obj.action.name.lower(), action_obj.project.name) logger.info( "Updated to dashboard - {db}, taskid - {tid}, response - {resp}" .format(db=dashboard, tid=task_id, resp=response)) except Exception as e: logger.info( "Error sending to grafana . Edge task id - {task}. Error - {err}" .format(task=task_id, err=e)) jira_id = configs.get('jira_id', None) if jira_id is not None and action_obj.action.name.lower( ) == "deploy": jira_obj = Jira(jira_id) jira_obj.update_transition("Completed") # update consul for scaling purpose consul_client = ConsulOperation() key = "projects/" + action_obj.project.space.name.upper( ) + "/" + action_obj.project.name version = configs.get('version', None) set_output = consul_client.setKV(key, version) logger.debug('consul setKV resp %s' % set_output) else: status = 'FAILED' dbobj.setActionInfoState(task_id, status) except Exception as e: logger.exception(e)
def action(task_id): try: dbobj = DBHandler() action_info = ActionInfo.objects.get(id=task_id) if not action_info.status.name == "BUILDING": message = "task [%s] is either completed or not scheduled, status - %s. Can't start it." % ( task_id, action_info.status.name) raise Exception(message) action_info.update_status('SCHEDULED') action_info.update_start_time() logger.info("ActionInfo[%s], status - %s" % (action_info.id, action_info.status.name)) configs = action_info.config_dict() # key Name from which server details is available server_info = configs.get('server_info') server_info_dict = configs.get(server_info, {}) # Actual servers on which action need to be taken servers = separate_servers_by_zone(action_info.server_list()) script_name = action_info.script_file_name zone_list = [{ 'zone': zone, server_info: server_info_dict.get(zone), 'servers': servers.get(zone) } for zone in servers if servers.get(zone)] logger.debug("server_info_dict - %s zone_list[%s]- %s" % (server_info_dict, len(zone_list), zone_list)) zone_parallelazation = configs.get('zone_parallelazation', True) if zone_parallelazation is True: if len(zone_list) > 1: res = chord([ action_zone.s(action_info.id, serverdetail, script_name) for serverdetail in zone_list ])(closure_task.s(task_id=task_id).on_error( closure_error_task.s(task_id=task_id))) for taskids in res.parent.children: dbobj.updateActionInfoTaskIds(action_info, taskids.task_id) elif len(zone_list) == 1: res = action_zone.apply_async( (action_info.id, zone_list[0], script_name), link=closure_task.s(task_id=task_id), link_error=closure_error_task.s(task_id=task_id)) dbobj.updateActionInfoTaskIds(action_info, res.task_id) else: raise Exception("Unable to capture zone info from task- %s" % task_id) else: zone_serialization_order = configs.get('zone_serialization_order', []) edge_cache = EdgeCache(task_id) current_serialize_zone = edge_cache.get().get( 'current_serialize_zone', None) next_serialize_zone = edge_cache.get().get('next_serialize_zone', None) if current_serialize_zone is None and next_serialize_zone is None: # It means it is first zone deployment. current_serialize_zone = zone_serialization_order[0] if len( zone_serialization_order) > 0 else None next_serialize_zone = zone_serialization_order[1] if len( zone_serialization_order) > 1 else None edge_cache.set(next_serialize_zone=next_serialize_zone, current_serialize_zone=current_serialize_zone) elif next_serialize_zone is not None: current_serialize_zone = next_serialize_zone next_serialize_zone_index = zone_serialization_order.index( current_serialize_zone) + 1 next_serialize_zone = next_serialize_zone_index if len( zone_serialization_order ) > next_serialize_zone_index + 1 else None edge_cache.set(next_serialize_zone=next_serialize_zone, current_serialize_zone=current_serialize_zone) else: logger.info( "deployment for all zone - %s has been completed." % zone_serialization_order) action_info.update_status('COMPLETED') return if current_serialize_zone is not None: zone_configuration = [ zone_detail for zone_detail in zone_list if zone_detail.get('zone') == current_serialize_zone ] if len(zone_configuration) > 0: res = action_zone.apply_async( (action_info.id, zone_configuration[0], script_name), link=closure_task.s(task_id=task_id), link_error=closure_error_task.s(task_id=task_id)) dbobj.updateActionInfoTaskIds(action_info, res.id) ELKHandler(task_id).write_logs( exception="Scheduling task for zone - %s" % current_serialize_zone, level='info') else: message = "No Zone Information Configured for zone - %s" % current_serialize_zone ELKHandler(task_id).write_logs(exception=message, level="warn") closure_task(task_id=task_id) # not raising error as there might be no servers selected for this zone # raise Exception(message) edge_cache = EdgeCache(task_id) zone = [zone.get('zone') for zone in zone_list] edge_cache.set(zone=zone) try: # Update Status in Jira email_dict = {} jira_id = configs.get('jira_id', None) space_name = dbobj.getSpaceName(action_info.project.name) host_uri = dbobj.getValue("edge_host") uri = "%s%s" % (host_uri, reverse('action', args=(space_name, task_id))) message = "Started - %s at Edge Task Id - %s, check status : - %s" % ( action_info.action.name, task_id, uri) if jira_id is not None and action_info.action.name.lower( ) == "deploy": jira_obj = Jira(jira_id) jira_obj.update_transition("InProgress") jira_obj.comment(message) if configs.get('notify_me', False): if configs.get('notify_action', 'all') in [action_info.action.name, 'all']: reason = configs.get('description', None) email_dict['reason'] = reason email_dict['action'] = action_info.action.name email_dict['task_id'] = task_id email_dict['uri'] = uri email_dict['user'] = action_info.user.first_name recipients = dbobj.getSpaceDLs(action_info.project.name) cc = [] subject = 'Edge notify for Project - %s | Action -%s | Task Id - [#%s]' % ( action_info.project.name, action_info.action.name, task_id) result = render('report/templates/email_msg.html', email_dict) email_from = 'edge_' + action_info.action.name + '@makemytrip.com' edge_email = EmailMessage(subject, result, email_from, recipients, cc=cc) edge_email.content_subtype = "html" edge_email.send(fail_silently=False) except Exception, e: logger.error(e) except Exception, e: logger.exception(e) dbobj.setActionInfoState(task_id, 'FAILED') dbobj.updateActionInfoEndTime(task_id) # Send Error to ELK ELKHandler(task_id).write_logs(exception=str(e))