Beispiel #1
0
    def get(self, request, space_name=None, id=None, cmd=None):

        user = request.user
        if id and cmd.lower() == "revoke":
            killing_task(id, user=user, kill_action="MANUAL_FAILED")

        resp = {}
        if id:
            resp['display'] = []

            es = ELKHandler(id)
            basic_entries = es.read_action_entries(detailed=False)
            if len(basic_entries) > 0:
                for entry in basic_entries:
                    if entry.get('category') == 'display' and entry[
                            'exception'] is not None:
                        resp['display'].append({
                            entry.get('level', 'error').upper():
                            entry.get('exception')
                        })

            try:
                status = 200
                action_info = ActionInfo.objects.get(id=id)
                action_info = ActionSerializer(action_info, many=False)
                resp.update(action_info.data)
            except Exception as e:
                resp['status'] = "FAILED"
                resp['error'] = str(e)
                status = 404
            finally:
                return JsonResponse(resp, status=status, safe=True)
Beispiel #2
0
def check_completion_server_wise(action):
    status = True
    try:
        es = ELKHandler(action.id)
        es_data = es.read_action_entries()
        server_list = action.server_list()
        es_server_info = {}
        for server in server_list:
            es_server_info[server] = {
                "version": None,
                "project": action.project.name
            }
        for entry in es_data:
            if entry.get("category") == "report" and entry.get(
                    "version") == action.config_dict().get("version"):
                es_server_info[entry.get("server")]["version"] = entry.get(
                    "version")
                try:
                    dendrogram_info = Dendrogram.objects.get(
                        server=entry.get("server"), project=action.project)
                    if action.action.name not in ["restart", "hard_restart"]:
                        dendrogram_info.version = entry.get("version")
                        dendrogram_info.save()
                        logger.info("Updating dendrogram data for %s - %s" %
                                    (action.project.name, entry.get("server")))
                    else:
                        logger.info(
                            "Not updating dendrogram data for %s - %s for action - %s"
                            % (action.project.name, entry.get("server"),
                               action.action.name))
                        pass
                except Dendrogram.DoesNotExist as e:
                    version_name = "Unknown" if action.action.name in [
                        "restart", "hard_restart"
                    ] else entry.get('version')
                    Dendrogram(project=action.project,
                               user=action.user,
                               server=entry.get("server"),
                               version=version_name).save()

        for server, info in es_server_info.items():
            if info.get("version") is None:
                status = False
                break
    except Exception as e:
        logger.exception(e)
    finally:
        logger.info(
            " for action %s check_completion_server_wise status is %s" %
            (action.id, status))
        return status
Beispiel #3
0
    def get(self, request, space_name=None, id=None):
        self.d = {}
        try:
            self.d['space'] = Space.objects.get(name=space_name)
            self.d['project'] = Project.objects.filter(space=self.d['space'])
            if id:
                self.d['action_info_list'] = ActionInfo.objects.filter(id=id)

                es = ELKHandler(id)
                self.d['basic_entries'] = es.read_action_entries(
                    detailed=False)

                if request.user:
                    self.d['detailed_entries'] = es.read_action_entries(
                        detailed=True)

                self.d['display'] = []
                for entry in self.d['basic_entries']:
                    if entry.get('category') == 'display' and entry[
                            'exception'] is not None:
                        self.d['display'].append({
                            entry.get('level', 'error').upper():
                            entry.get('exception')
                        })

                self.d['action_info_id'] = id

                class_name = get_script_class_name(
                    self.d['action_info_list'][0].script_file_name)
                class_object = globals().get(class_name, None)
                script_obj = class_object(
                    task_obj=self.d['action_info_list'][0])

                self.d['task_details'] = getattr(
                    script_obj, 'get_task_details')(
                        self.d['action_info_list'][0].action.name)

            else:
                self.d['action_info_list'] = ActionInfo.objects.filter(
                    project__in=self.d['project']).order_by('-id')
        except Exception as e:
            self.d['error'] = e
            logger.exception(e)

        return render(request, 'action.html', self.d)
Beispiel #4
0
def killing_task(task_id, reason=None, user=None, kill_action='MANUAL_FAILED'):
    dbobj = DBHandler()
    task_obj = dbobj.getActionInfoObj(task_id)
    killed_status = False
    celery_stop_task_status = ['REVOKED', 'SUCCESS', 'FAILED']

    # if killed_status is True:
    dbobj.updateActionInfoEndTime(task_id)
    dbobj.setActionInfoState(task_id, kill_action.upper(), reason)
    username = user.username if user else "anonymous_user"
    killing_logs = "task is %s by %s, reason %s, killed_status %s" % (
        kill_action.upper(), username, reason, killed_status)
    ELKHandler(task_id).write_logs(exception=killing_logs)

    try:
        for task in task_obj.task_ids.split(","):
            retry_count = 3
            task_killed_status = False
            if task is not None and str(task).strip != "":
                while retry_count > 0 and task_killed_status is False:
                    logger.info(
                        "Revoking Task Id - %s for Action Task Id - %s, Trying - %s"
                        % (task, task_id, retry_count))
                    app.control.revoke(task, terminate=True)
                    time.sleep(1)
                    response = AsyncResult(task)
                    if response.status.upper() in celery_stop_task_status:
                        task_killed_status = True
                        logger.info(
                            "Task ID - %s has been revoked with status - %s and state - %s"
                            % (task, response.status, response.state))
                    retry_count -= 1
                    if response.status.upper() == 'REVOKED':
                        killed_status = True
                if task_killed_status is False:
                    exception = "Unable to Revoke on Action Info Id - %s celery task id - %s with state %s" % (
                        task_id, task, response.state)
                    ELKHandler(task_id).write_logs(exception=exception)
    except Exception as e:
        logger.error(e)
Beispiel #5
0
 def __init__(self, task_obj=None, params=None):
     self.params = params if params is not None else {}
     self.es = ELKHandler(task_obj.id)
     self.task_obj = task_obj
     self.config = task_obj.config_dict()
     self.failed_server_list = []
Beispiel #6
0
    def post(self, request, space_name=None, id=None):
        self.d = {}
        self.data = {
            "draw": 1,
            "recordsTotal": 0,
            "recordsFiltered": 0,
            "data": []
        }

        try:
            if id:
                action_info = ActionInfo.objects.get(id=id)
                es = ELKHandler(id)
                # basic_entries = es.read_action_entries(detailed=False)
                detailed_entries = es.read_action_entries(detailed=True)

                data_table_info = []
                server_detail = {}

                for server in action_info.server_list():
                    list_info = {}
                    server_detail[server] = []

                    list_info['server'] = server

                    for entry in detailed_entries:
                        if entry['server'] == server:
                            task_info = {}
                            task_info[entry['task']] = {
                                'status':
                                entry.get('status'),
                                'duration':
                                entry.get('duration'),
                                'batch_count':
                                entry.get('batch_count'),
                                'description':
                                entry.get('description'),
                                'staggered_label':
                                entry.get('staggered_label'),
                                'method':
                                entry.get('method'),
                                'exception':
                                entry.get('exception'),
                                'session':
                                entry.get('session'),
                                'timestamp':
                                entry.get('timestamp').split('.')[0].replace(
                                    'T', ' '),
                                'starttime':
                                entry.get('starttime').split('.')[0].replace(
                                    'T', ' '),
                                'endtime':
                                entry.get('endtime').split('.')[0].replace(
                                    'T', ' ')
                            }
                            server_detail[server].append(task_info)

                    list_info['details'] = server_detail[server]

                    data_table_info.append(list_info)

            self.data['data'] = data_table_info
            self.data['recordsTotal'] = len(data_table_info)
            self.data['recordsFiltered'] = len(data_table_info)

        except Exception as e:
            self.d['error'] = e
            logger.exception(e)

        return HttpResponse(json.dumps(self.data),
                            content_type='application/json')
Beispiel #7
0
            error_message = "%s-%s" % (
                "Unable to parse response values from Canary Response", str(e))
            logger.error(error_message)
            return JsonResponse(
                {
                    'status': False,
                    'code': 400,
                    'error': error_message
                },
                status=400)

        try:
            # We don't need to raise Exception here, need to fix
            if action_info_obj.status.name.upper() in STOP_TASK_STATUS:
                message = "Action Info Task Id - %s has already been completed" % task_id
                ELKHandler(task_id).write_logs(exception=message)
                return JsonResponse(
                    {
                        'status': False,
                        'code': 400,
                        'error': message
                    },
                    status=200)
            edge_cache_obj = EdgeCache(task_id, zone)
            data = edge_cache_obj.get()
            if data is None or len(data) == 0:
                raise Exception(
                    "Cache Miss - Data for task_id - %s not found in Cache" %
                    task_id)
            current_staggered_label = data.get('current_staggered_label')
            if task_status is not True:
Beispiel #8
0
def action(task_id):
    try:
        dbobj = DBHandler()
        action_info = ActionInfo.objects.get(id=task_id)
        if not action_info.status.name == "BUILDING":
            message = "task [%s] is either completed or not scheduled, status - %s. Can't start it." % (
                task_id, action_info.status.name)
            raise Exception(message)

        action_info.update_status('SCHEDULED')
        action_info.update_start_time()

        logger.info("ActionInfo[%s], status - %s" %
                    (action_info.id, action_info.status.name))
        configs = action_info.config_dict()
        # key Name from which server details is available
        server_info = configs.get('server_info')
        server_info_dict = configs.get(server_info, {})

        # Actual servers on which action need to be taken
        servers = separate_servers_by_zone(action_info.server_list())
        script_name = action_info.script_file_name
        zone_list = [{
            'zone': zone,
            server_info: server_info_dict.get(zone),
            'servers': servers.get(zone)
        } for zone in servers if servers.get(zone)]

        logger.debug("server_info_dict - %s zone_list[%s]- %s" %
                     (server_info_dict, len(zone_list), zone_list))
        zone_parallelazation = configs.get('zone_parallelazation', True)
        if zone_parallelazation is True:
            if len(zone_list) > 1:
                res = chord([
                    action_zone.s(action_info.id, serverdetail, script_name)
                    for serverdetail in zone_list
                ])(closure_task.s(task_id=task_id).on_error(
                    closure_error_task.s(task_id=task_id)))
                for taskids in res.parent.children:
                    dbobj.updateActionInfoTaskIds(action_info, taskids.task_id)
            elif len(zone_list) == 1:
                res = action_zone.apply_async(
                    (action_info.id, zone_list[0], script_name),
                    link=closure_task.s(task_id=task_id),
                    link_error=closure_error_task.s(task_id=task_id))
                dbobj.updateActionInfoTaskIds(action_info, res.task_id)
            else:
                raise Exception("Unable to capture zone info from task- %s" %
                                task_id)
        else:
            zone_serialization_order = configs.get('zone_serialization_order',
                                                   [])
            edge_cache = EdgeCache(task_id)
            current_serialize_zone = edge_cache.get().get(
                'current_serialize_zone', None)
            next_serialize_zone = edge_cache.get().get('next_serialize_zone',
                                                       None)

            if current_serialize_zone is None and next_serialize_zone is None:
                # It means it is first zone deployment.
                current_serialize_zone = zone_serialization_order[0] if len(
                    zone_serialization_order) > 0 else None
                next_serialize_zone = zone_serialization_order[1] if len(
                    zone_serialization_order) > 1 else None
                edge_cache.set(next_serialize_zone=next_serialize_zone,
                               current_serialize_zone=current_serialize_zone)
            elif next_serialize_zone is not None:
                current_serialize_zone = next_serialize_zone
                next_serialize_zone_index = zone_serialization_order.index(
                    current_serialize_zone) + 1
                next_serialize_zone = next_serialize_zone_index if len(
                    zone_serialization_order
                ) > next_serialize_zone_index + 1 else None
                edge_cache.set(next_serialize_zone=next_serialize_zone,
                               current_serialize_zone=current_serialize_zone)
            else:
                logger.info(
                    "deployment for all zone - %s has been completed." %
                    zone_serialization_order)
                action_info.update_status('COMPLETED')
                return
            if current_serialize_zone is not None:
                zone_configuration = [
                    zone_detail for zone_detail in zone_list
                    if zone_detail.get('zone') == current_serialize_zone
                ]
                if len(zone_configuration) > 0:
                    res = action_zone.apply_async(
                        (action_info.id, zone_configuration[0], script_name),
                        link=closure_task.s(task_id=task_id),
                        link_error=closure_error_task.s(task_id=task_id))
                    dbobj.updateActionInfoTaskIds(action_info, res.id)
                    ELKHandler(task_id).write_logs(
                        exception="Scheduling task for zone - %s" %
                        current_serialize_zone,
                        level='info')
                else:
                    message = "No Zone Information Configured for zone - %s" % current_serialize_zone
                    ELKHandler(task_id).write_logs(exception=message,
                                                   level="warn")
                    closure_task(task_id=task_id)
                    # not raising error as there might be no servers selected for this zone
                    # raise Exception(message)
        edge_cache = EdgeCache(task_id)
        zone = [zone.get('zone') for zone in zone_list]
        edge_cache.set(zone=zone)
        try:
            # Update Status in Jira
            email_dict = {}
            jira_id = configs.get('jira_id', None)
            space_name = dbobj.getSpaceName(action_info.project.name)
            host_uri = dbobj.getValue("edge_host")
            uri = "%s%s" % (host_uri,
                            reverse('action', args=(space_name, task_id)))
            message = "Started - %s at Edge Task Id - %s, check status : - %s" % (
                action_info.action.name, task_id, uri)
            if jira_id is not None and action_info.action.name.lower(
            ) == "deploy":
                jira_obj = Jira(jira_id)
                jira_obj.update_transition("InProgress")
                jira_obj.comment(message)

            if configs.get('notify_me', False):

                if configs.get('notify_action',
                               'all') in [action_info.action.name, 'all']:

                    reason = configs.get('description', None)
                    email_dict['reason'] = reason
                    email_dict['action'] = action_info.action.name
                    email_dict['task_id'] = task_id
                    email_dict['uri'] = uri
                    email_dict['user'] = action_info.user.first_name

                    recipients = dbobj.getSpaceDLs(action_info.project.name)
                    cc = []
                    subject = 'Edge notify for Project - %s | Action -%s | Task Id - [#%s]' % (
                        action_info.project.name, action_info.action.name,
                        task_id)
                    result = render('report/templates/email_msg.html',
                                    email_dict)
                    email_from = 'edge_' + action_info.action.name + '@makemytrip.com'
                    edge_email = EmailMessage(subject,
                                              result,
                                              email_from,
                                              recipients,
                                              cc=cc)
                    edge_email.content_subtype = "html"
                    edge_email.send(fail_silently=False)

        except Exception, e:
            logger.error(e)
    except Exception, e:
        logger.exception(e)
        dbobj.setActionInfoState(task_id, 'FAILED')
        dbobj.updateActionInfoEndTime(task_id)
        # Send Error to ELK
        ELKHandler(task_id).write_logs(exception=str(e))