Ejemplo n.º 1
0
    def patch(self, request, pk, agentid):
        from alerts.models import Alert
        from logs.models import AuditLog

        agent = get_object_or_404(Agent, agent_id=agentid)
        task = get_object_or_404(AutomatedTask, pk=pk)

        serializer = TaskRunnerPatchSerializer(
            instance=task, data=request.data, partial=True
        )
        serializer.is_valid(raise_exception=True)
        serializer.save(last_run=djangotime.now())

        status = "failing" if task.retcode != 0 else "passing"

        new_task: AutomatedTask = AutomatedTask.objects.get(pk=task.pk)
        new_task.status = status
        new_task.save()

        if status == "passing":
            if Alert.objects.filter(assigned_task=new_task, resolved=False).exists():
                Alert.handle_alert_resolve(new_task)
        else:
            Alert.handle_alert_failure(new_task)

        AuditLog.objects.create(
            username=agent.hostname,
            agent=agent.hostname,
            object_type="agent",
            action="task_run",
            message=f"Scheduled Task {task.name} was run on {agent.hostname}",
            after_value=AutomatedTask.serialize(new_task),
        )

        return Response("ok")
Ejemplo n.º 2
0
def agent_outages_task() -> None:
    from alerts.models import Alert

    agents = Agent.objects.only(
        "pk",
        "last_seen",
        "offline_time",
        "overdue_time",
        "overdue_email_alert",
        "overdue_text_alert",
        "overdue_dashboard_alert",
    )

    for agent in agents:
        if agent.status == "overdue":
            Alert.handle_alert_failure(agent)
Ejemplo n.º 3
0
    def handle_check(self, data):
        from alerts.models import Alert

        # cpuload or mem checks
        if self.check_type == "cpuload" or self.check_type == "memory":

            self.history.append(data["percent"])

            if len(self.history) > 15:
                self.history = self.history[-15:]

            self.save(update_fields=["history"])

            avg = int(mean(self.history))

            if self.error_threshold and avg > self.error_threshold:
                self.status = "failing"
                self.alert_severity = "error"
            elif self.warning_threshold and avg > self.warning_threshold:
                self.status = "failing"
                self.alert_severity = "warning"
            else:
                self.status = "passing"

            # add check history
            self.add_check_history(data["percent"])

        # diskspace checks
        elif self.check_type == "diskspace":
            if data["exists"]:
                percent_used = round(data["percent_used"])
                if self.error_threshold and (
                        100 - percent_used) < self.error_threshold:
                    self.status = "failing"
                    self.alert_severity = "error"
                elif (self.warning_threshold
                      and (100 - percent_used) < self.warning_threshold):
                    self.status = "failing"
                    self.alert_severity = "warning"

                else:
                    self.status = "passing"

                self.more_info = data["more_info"]

                # add check history
                self.add_check_history(100 - percent_used)
            else:
                self.status = "failing"
                self.alert_severity = "error"
                self.more_info = f"Disk {self.disk} does not exist"

            self.save(update_fields=["more_info"])

        # script checks
        elif self.check_type == "script":
            self.stdout = data["stdout"]
            self.stderr = data["stderr"]
            self.retcode = data["retcode"]
            self.execution_time = "{:.4f}".format(data["runtime"])

            if data["retcode"] in self.info_return_codes:
                self.alert_severity = "info"
                self.status = "failing"
            elif data["retcode"] in self.warning_return_codes:
                self.alert_severity = "warning"
                self.status = "failing"
            elif data["retcode"] != 0:
                self.status = "failing"
                self.alert_severity = "error"
            else:
                self.status = "passing"

            self.save(update_fields=[
                "stdout",
                "stderr",
                "retcode",
                "execution_time",
            ])

            # add check history
            self.add_check_history(
                1 if self.status == "failing" else 0,
                {
                    "retcode": data["retcode"],
                    "stdout": data["stdout"][:60],
                    "stderr": data["stderr"][:60],
                    "execution_time": self.execution_time,
                },
            )

        # ping checks
        elif self.check_type == "ping":
            self.status = data["status"]
            self.more_info = data["output"]
            self.save(update_fields=["more_info"])

            self.add_check_history(1 if self.status == "failing" else 0,
                                   self.more_info[:60])

        # windows service checks
        elif self.check_type == "winsvc":
            self.status = data["status"]
            self.more_info = data["more_info"]
            self.save(update_fields=["more_info"])

            self.add_check_history(1 if self.status == "failing" else 0,
                                   self.more_info[:60])

        elif self.check_type == "eventlog":
            log = data["log"]
            if self.fail_when == "contains":
                if log and len(log) >= self.number_of_events_b4_alert:
                    self.status = "failing"
                else:
                    self.status = "passing"

            elif self.fail_when == "not_contains":
                if log and len(log) >= self.number_of_events_b4_alert:
                    self.status = "passing"
                else:
                    self.status = "failing"

            self.extra_details = {"log": log}
            self.save(update_fields=["extra_details"])

            self.add_check_history(
                1 if self.status == "failing" else 0,
                "Events Found:" + str(len(self.extra_details["log"])),
            )

        # handle status
        if self.status == "failing":
            self.fail_count += 1
            self.save(update_fields=["status", "fail_count", "alert_severity"])

            if self.fail_count >= self.fails_b4_alert:
                Alert.handle_alert_failure(self)

        elif self.status == "passing":
            self.fail_count = 0
            self.save(update_fields=["status", "fail_count", "alert_severity"])
            if Alert.objects.filter(assigned_check=self,
                                    resolved=False).exists():
                Alert.handle_alert_resolve(self)

        return self.status
Ejemplo n.º 4
0
    def patch(self, request, pk, agentid):
        from alerts.models import Alert
        from logs.models import AuditLog

        agent = get_object_or_404(Agent, agent_id=agentid)
        task = get_object_or_404(AutomatedTask, pk=pk)

        serializer = TaskRunnerPatchSerializer(instance=task,
                                               data=request.data,
                                               partial=True)
        serializer.is_valid(raise_exception=True)
        new_task = serializer.save(last_run=djangotime.now())

        # check if task is a collector and update the custom field
        if task.custom_field:
            if not task.stderr:

                if AgentCustomField.objects.filter(field=task.custom_field,
                                                   agent=task.agent).exists():
                    agent_field = AgentCustomField.objects.get(
                        field=task.custom_field, agent=task.agent)
                else:
                    agent_field = AgentCustomField.objects.create(
                        field=task.custom_field, agent=task.agent)

                # get last line of stdout
                value = new_task.stdout.split("\n")[-1].strip()

                if task.custom_field.type in [
                        "text", "number", "single", "datetime"
                ]:
                    agent_field.string_value = value
                    agent_field.save()
                elif task.custom_field.type == "multiple":
                    agent_field.multiple_value = value.split(",")
                    agent_field.save()
                elif task.custom_field.type == "checkbox":
                    agent_field.bool_value = bool(value)
                    agent_field.save()

                status = "passing"
            else:
                status = "failing"
        else:
            status = "failing" if task.retcode != 0 else "passing"

        new_task.status = status
        new_task.save()

        if status == "passing":
            if Alert.objects.filter(assigned_task=new_task,
                                    resolved=False).exists():
                Alert.handle_alert_resolve(new_task)
        else:
            Alert.handle_alert_failure(new_task)

        AuditLog.objects.create(
            username=agent.hostname,
            agent=agent.hostname,
            object_type="agent",
            action="task_run",
            message=f"Scheduled Task {task.name} was run on {agent.hostname}",
            after_value=AutomatedTask.serialize(new_task),
        )

        return Response("ok")
Ejemplo n.º 5
0
    def handle_checkv2(self, data):
        from alerts.models import Alert

        # cpuload or mem checks
        if self.check_type == "cpuload" or self.check_type == "memory":

            self.history.append(data["percent"])

            if len(self.history) > 15:
                self.history = self.history[-15:]

            self.save(update_fields=["history"])

            avg = int(mean(self.history))

            if self.error_threshold and avg > self.error_threshold:
                self.status = "failing"
                self.alert_severity = "error"
            elif self.warning_threshold and avg > self.warning_threshold:
                self.status = "failing"
                self.alert_severity = "warning"
            else:
                self.status = "passing"

            # add check history
            self.add_check_history(data["percent"])

        # diskspace checks
        elif self.check_type == "diskspace":
            if data["exists"]:
                percent_used = round(data["percent_used"])
                total = bytes2human(data["total"])
                free = bytes2human(data["free"])

                if self.error_threshold and (
                        100 - percent_used) < self.error_threshold:
                    self.status = "failing"
                    self.alert_severity = "error"
                elif (self.warning_threshold
                      and (100 - percent_used) < self.warning_threshold):
                    self.status = "failing"
                    self.alert_severity = "warning"

                else:
                    self.status = "passing"

                self.more_info = f"Total: {total}B, Free: {free}B"

                # add check history
                self.add_check_history(100 - percent_used)
            else:
                self.status = "failing"
                self.alert_severity = "error"
                self.more_info = f"Disk {self.disk} does not exist"

            self.save(update_fields=["more_info"])

        # script checks
        elif self.check_type == "script":
            self.stdout = data["stdout"]
            self.stderr = data["stderr"]
            self.retcode = data["retcode"]
            try:
                # python agent
                self.execution_time = "{:.4f}".format(data["stop"] -
                                                      data["start"])
            except:
                # golang agent
                self.execution_time = "{:.4f}".format(data["runtime"])

            if data["retcode"] in self.info_return_codes:
                self.alert_severity = "info"
                self.status = "failing"
            elif data["retcode"] in self.warning_return_codes:
                self.alert_severity = "warning"
                self.status = "failing"
            elif data["retcode"] != 0:
                self.status = "failing"
                self.alert_severity = "error"
            else:
                self.status = "passing"

            self.save(update_fields=[
                "stdout",
                "stderr",
                "retcode",
                "execution_time",
            ])

            # add check history
            self.add_check_history(
                1 if self.status == "failing" else 0,
                {
                    "retcode": data["retcode"],
                    "stdout": data["stdout"][:60],
                    "stderr": data["stderr"][:60],
                    "execution_time": self.execution_time,
                },
            )

        # ping checks
        elif self.check_type == "ping":
            success = ["Reply", "bytes", "time", "TTL"]
            output = data["output"]

            if data["has_stdout"]:
                if all(x in output for x in success):
                    self.status = "passing"
                else:
                    self.status = "failing"
            elif data["has_stderr"]:
                self.status = "failing"

            self.more_info = output
            self.save(update_fields=["more_info"])

            self.add_check_history(1 if self.status == "failing" else 0,
                                   self.more_info[:60])

        # windows service checks
        elif self.check_type == "winsvc":
            svc_stat = data["status"]
            self.more_info = f"Status {svc_stat.upper()}"

            if data["exists"]:
                if svc_stat == "running":
                    self.status = "passing"
                elif svc_stat == "start_pending" and self.pass_if_start_pending:
                    self.status = "passing"
                else:
                    if self.agent and self.restart_if_stopped:
                        nats_data = {
                            "func": "winsvcaction",
                            "payload": {
                                "name": self.svc_name,
                                "action": "start"
                            },
                        }
                        r = asyncio.run(
                            self.agent.nats_cmd(nats_data, timeout=32))
                        if r == "timeout" or r == "natsdown":
                            self.status = "failing"
                        elif not r["success"] and r["errormsg"]:
                            self.status = "failing"
                        elif r["success"]:
                            self.status = "passing"
                            self.more_info = f"Status RUNNING"
                        else:
                            self.status = "failing"
                    else:
                        self.status = "failing"

            else:
                if self.pass_if_svc_not_exist:
                    self.status = "passing"
                else:
                    self.status = "failing"

                self.more_info = f"Service {self.svc_name} does not exist"

            self.save(update_fields=["more_info"])

            self.add_check_history(1 if self.status == "failing" else 0,
                                   self.more_info[:60])

        elif self.check_type == "eventlog":
            log = []
            is_wildcard = self.event_id_is_wildcard
            eventType = self.event_type
            eventID = self.event_id
            source = self.event_source
            message = self.event_message
            r = data["log"]

            for i in r:
                if i["eventType"] == eventType:
                    if not is_wildcard and not int(i["eventID"]) == eventID:
                        continue

                    if not source and not message:
                        if is_wildcard:
                            log.append(i)
                        elif int(i["eventID"]) == eventID:
                            log.append(i)
                        continue

                    if source and message:
                        if is_wildcard:
                            if source in i["source"] and message in i[
                                    "message"]:
                                log.append(i)

                        elif int(i["eventID"]) == eventID:
                            if source in i["source"] and message in i[
                                    "message"]:
                                log.append(i)

                        continue

                    if source and source in i["source"]:
                        if is_wildcard:
                            log.append(i)
                        elif int(i["eventID"]) == eventID:
                            log.append(i)

                    if message and message in i["message"]:
                        if is_wildcard:
                            log.append(i)
                        elif int(i["eventID"]) == eventID:
                            log.append(i)

            if self.fail_when == "contains":
                if log:
                    self.status = "failing"
                else:
                    self.status = "passing"

            elif self.fail_when == "not_contains":
                if log:
                    self.status = "passing"
                else:
                    self.status = "failing"

            self.extra_details = {"log": log}
            self.save(update_fields=["extra_details"])

            self.add_check_history(
                1 if self.status == "failing" else 0,
                "Events Found:" + str(len(self.extra_details["log"])),
            )

        # handle status
        if self.status == "failing":
            self.fail_count += 1
            self.save(update_fields=["status", "fail_count", "alert_severity"])

            if self.fail_count >= self.fails_b4_alert:
                Alert.handle_alert_failure(self)

        elif self.status == "passing":
            self.fail_count = 0
            self.save(update_fields=["status", "fail_count", "alert_severity"])
            if Alert.objects.filter(assigned_check=self,
                                    resolved=False).exists():
                Alert.handle_alert_resolve(self)

        return self.status