def test_mark_nodes_failed_after_missing_timeout_heartbeat(self):
        node, script_set = self.make_node()
        script_set.last_ping = datetime.now() - timedelta(minutes=11)
        script_set.save()
        script_results = [
            factory.make_ScriptResult(
                script_set=script_set, status=SCRIPT_STATUS.PENDING)
            for _ in range(3)
        ]

        mark_nodes_failed_after_missing_script_timeout()
        node = reload_object(node)

        self.assertEquals(self.failed_status, node.status)
        self.assertEquals(
            "Node has not been heard from for the last 10 minutes",
            node.error_description)
        self.assertIn(
            call(
                "%s: Has not been heard from for the last 10 minutes" %
                node.hostname),
            self.maaslog.call_args_list)
        if node.enable_ssh:
            self.assertThat(self.mock_stop, MockNotCalled())
        else:
            self.assertThat(self.mock_stop, MockCalledOnce())
            self.assertIn(
                call("%s: Stopped because SSH is disabled" % node.hostname),
                self.maaslog.call_args_list)
        for script_result in script_results:
            self.assertEquals(
                SCRIPT_STATUS.TIMEDOUT, reload_object(script_result).status)
    def test_uses_param_runtime(self):
        node, script_set = self.make_node()
        now = datetime.now()
        script_set.last_ping = now
        script_set.save()
        passed_script_result = factory.make_ScriptResult(
            script_set=script_set, status=SCRIPT_STATUS.PASSED)
        failed_script_result = factory.make_ScriptResult(
            script_set=script_set, status=SCRIPT_STATUS.FAILED)
        pending_script_result = factory.make_ScriptResult(
            script_set=script_set, status=SCRIPT_STATUS.PENDING)
        script = factory.make_Script(timeout=timedelta(minutes=2))
        running_script_result = factory.make_ScriptResult(
            script_set=script_set, status=SCRIPT_STATUS.RUNNING, script=script,
            started=now - timedelta(minutes=50), parameters={'runtime': {
                'type': 'runtime',
                'value': 60 * 60,
                }})

        mark_nodes_failed_after_missing_script_timeout()
        node = reload_object(node)

        self.assertEquals(self.status, node.status)
        self.assertThat(self.mock_stop, MockNotCalled())
        self.assertEquals(
            SCRIPT_STATUS.PASSED, reload_object(passed_script_result).status)
        self.assertEquals(
            SCRIPT_STATUS.FAILED, reload_object(failed_script_result).status)
        self.assertEquals(
            SCRIPT_STATUS.PENDING,
            reload_object(pending_script_result).status)
        self.assertEquals(
            SCRIPT_STATUS.RUNNING,
            reload_object(running_script_result).status)
Exemple #3
0
    def test_mark_nodes_failed_after_builtin_commiss_script_overrun(self):
        user = factory.make_admin()
        node = factory.make_Node(status=NODE_STATUS.COMMISSIONING, owner=user)
        script_set = ScriptSet.objects.create_commissioning_script_set(node)
        node.current_commissioning_script_set = script_set
        node.save()
        current_time = now()
        script_set.last_ping = current_time
        script_set.save()
        pending_script_results = list(script_set.scriptresult_set.all())
        passed_script_result = pending_script_results.pop()
        passed_script_result.status = SCRIPT_STATUS.PASSED
        passed_script_result.save()
        failed_script_result = pending_script_results.pop()
        failed_script_result.status = SCRIPT_STATUS.FAILED
        failed_script_result.save()
        running_script_result = pending_script_results.pop()
        running_script_result.status = SCRIPT_STATUS.RUNNING
        running_script_result.started = current_time - timedelta(minutes=10)
        running_script_result.save()

        mark_nodes_failed_after_missing_script_timeout(current_time, 20)
        node = reload_object(node)

        self.assertEquals(NODE_STATUS.FAILED_COMMISSIONING, node.status)
        self.assertEquals(
            "%s has run past it's timeout(%s)" % (
                running_script_result.name,
                str(NODE_INFO_SCRIPTS[running_script_result.name]["timeout"]),
            ),
            node.error_description,
        )
        self.assertIn(
            call("%s: %s has run past it's timeout(%s)" % (
                node.hostname,
                running_script_result.name,
                str(NODE_INFO_SCRIPTS[running_script_result.name]["timeout"]),
            )),
            self.maaslog.call_args_list,
        )
        if node.enable_ssh:
            self.assertThat(self.mock_stop, MockNotCalled())
        else:
            self.assertThat(self.mock_stop, MockCalledOnce())
            self.assertIn(
                call("%s: Stopped because SSH is disabled" % node.hostname),
                self.maaslog.call_args_list,
            )
        self.assertEquals(SCRIPT_STATUS.PASSED,
                          reload_object(passed_script_result).status)
        self.assertEquals(SCRIPT_STATUS.FAILED,
                          reload_object(failed_script_result).status)
        self.assertEquals(SCRIPT_STATUS.TIMEDOUT,
                          reload_object(running_script_result).status)
        for script_result in pending_script_results:
            self.assertEquals(SCRIPT_STATUS.ABORTED,
                              reload_object(script_result).status)
    def test_mark_nodes_failed_after_script_overrun(self):
        node, script_set = self.make_node()
        current_time = now()
        script_set.last_ping = current_time
        script_set.save()
        passed_script_result = factory.make_ScriptResult(
            script_set=script_set, status=SCRIPT_STATUS.PASSED)
        failed_script_result = factory.make_ScriptResult(
            script_set=script_set, status=SCRIPT_STATUS.FAILED)
        pending_script_result = factory.make_ScriptResult(
            script_set=script_set, status=SCRIPT_STATUS.PENDING)
        script = factory.make_Script(timeout=timedelta(seconds=60))
        running_script_result = factory.make_ScriptResult(
            script_set=script_set,
            status=SCRIPT_STATUS.RUNNING,
            script=script,
            started=current_time - timedelta(minutes=10),
        )

        mark_nodes_failed_after_missing_script_timeout(current_time, 20)
        node = reload_object(node)

        self.assertEquals(self.failed_status, node.status)
        self.assertEquals(
            "%s has run past it's timeout(%s)" % (
                running_script_result.name,
                str(running_script_result.script.timeout),
            ),
            node.error_description,
        )
        self.assertIn(
            call("%s: %s has run past it's timeout(%s)" % (
                node.hostname,
                running_script_result.name,
                str(running_script_result.script.timeout),
            )),
            self.maaslog.call_args_list,
        )
        if node.enable_ssh:
            self.assertThat(self.mock_stop, MockNotCalled())
        else:
            self.assertThat(self.mock_stop, MockCalledOnce())
            self.assertIn(
                call("%s: Stopped because SSH is disabled" % node.hostname),
                self.maaslog.call_args_list,
            )
        self.assertEquals(SCRIPT_STATUS.PASSED,
                          reload_object(passed_script_result).status)
        self.assertEquals(SCRIPT_STATUS.FAILED,
                          reload_object(failed_script_result).status)
        self.assertEquals(SCRIPT_STATUS.ABORTED,
                          reload_object(pending_script_result).status)
        self.assertEquals(SCRIPT_STATUS.TIMEDOUT,
                          reload_object(running_script_result).status)
Exemple #5
0
    def test_mark_nodes_handled_last_ping_None(self):
        node, script_set = self.make_node()
        script_set.last_ping = None
        script_set.save()
        for _ in range(3):
            factory.make_ScriptResult(script_set=script_set,
                                      status=SCRIPT_STATUS.PENDING)

        # No exception should be raised.
        mark_nodes_failed_after_missing_script_timeout()
        node = reload_object(node)
        self.assertEquals(self.status, node.status)
    def test_mark_nodes_failed_after_missing_timeout_prefetches(self):
        self.patch(Node, "mark_failed")
        current_time = now()
        node, script_set = self.make_node()
        script_set.last_ping = current_time
        script_set.save()
        script = factory.make_Script(timeout=timedelta(seconds=60))
        factory.make_ScriptResult(
            script_set=script_set,
            status=SCRIPT_STATUS.RUNNING,
            script=script,
            started=current_time - timedelta(minutes=3),
        )

        counter_one = CountQueries()
        with counter_one:
            mark_nodes_failed_after_missing_script_timeout(current_time, 20)

        nodes = []
        for _ in range(6):
            node, script_set = self.make_node()
            script_set.last_ping = current_time
            script_set.save()
            script = factory.make_Script(timeout=timedelta(seconds=60))
            factory.make_ScriptResult(
                script_set=script_set,
                status=SCRIPT_STATUS.RUNNING,
                script=script,
                started=current_time - timedelta(minutes=3),
            )
            nodes.append(node)

        counter_many = CountQueries()
        with counter_many:
            mark_nodes_failed_after_missing_script_timeout(current_time, 20)

        # Lookup takes 7 queries no matter the amount of Nodes
        # 1. Get all Nodes in commissioning or testing
        # 2. Get all commissioning ScriptSets
        # 3. Get all testing ScriptSets
        # 4. Get all commissioning ScriptResults
        # 5. Get all testing ScriptResults
        # 6. Get all commissioning Scripts
        # 7. Get all testing Scripts
        self.assertEquals(7, counter_one.num_queries)
        self.assertEquals(7, counter_many.num_queries)
    def test_sets_status_expires_when_flatlined_with_may_reboot_script(self):
        node, script_set = self.make_node()
        now = datetime.now()
        if self.status == NODE_STATUS.COMMISSIONING:
            script_type = SCRIPT_TYPE.COMMISSIONING
        else:
            script_type = SCRIPT_TYPE.TESTING
        script = factory.make_Script(script_type=script_type, may_reboot=True)
        factory.make_ScriptResult(
            script=script, script_set=script_set, status=SCRIPT_STATUS.RUNNING)
        script_set.last_ping = now - timedelta(11)
        script_set.save()

        mark_nodes_failed_after_missing_script_timeout()
        node = reload_object(node)

        self.assertEquals(
            now - (now - script_set.last_ping) + timedelta(
                minutes=NODE_FAILURE_MONITORED_STATUS_TIMEOUTS[self.status]),
            node.status_expires)
Exemple #8
0
    def test_sets_status_expires_when_flatlined_with_may_reboot_script(self):
        node, script_set = self.make_node()
        current_time = now()
        if self.status == NODE_STATUS.COMMISSIONING:
            script_type = SCRIPT_TYPE.COMMISSIONING
        else:
            script_type = SCRIPT_TYPE.TESTING
        script = factory.make_Script(script_type=script_type, may_reboot=True)
        factory.make_ScriptResult(
            script=script, script_set=script_set, status=SCRIPT_STATUS.RUNNING)
        script_set.last_ping = current_time - timedelta(11)
        script_set.save()

        mark_nodes_failed_after_missing_script_timeout(current_time, 20)
        node = reload_object(node)

        self.assertEquals(
            current_time - (current_time - script_set.last_ping) + timedelta(
                minutes=get_node_timeout(self.status, 20)),
            node.status_expires)
    def test_mark_nodes_failed_after_missing_timeout_prefetches(self):
        self.patch(Node, 'mark_failed')
        now = datetime.now()
        nodes = []
        for _ in range(3):
            node, script_set = self.make_node()
            script_set.last_ping = now
            script_set.save()
            script = factory.make_Script(timeout=timedelta(seconds=60))
            factory.make_ScriptResult(
                script_set=script_set, status=SCRIPT_STATUS.RUNNING,
                script=script, started=now - timedelta(minutes=3))
            nodes.append(node)

        counter = CountQueries()
        with counter:
            mark_nodes_failed_after_missing_script_timeout()
        # Initial lookup and prefetch take three queries. This is done once to
        # find the nodes which nodes are being tests and on each node which
        # scripts are currently running.
        self.assertEquals(3 + len(nodes) * 2, counter.num_queries)
    def test_uses_param_runtime(self):
        node, script_set = self.make_node()
        current_time = now()
        script_set.last_ping = current_time
        script_set.save()
        passed_script_result = factory.make_ScriptResult(
            script_set=script_set, status=SCRIPT_STATUS.PASSED
        )
        failed_script_result = factory.make_ScriptResult(
            script_set=script_set, status=SCRIPT_STATUS.FAILED
        )
        pending_script_result = factory.make_ScriptResult(
            script_set=script_set, status=SCRIPT_STATUS.PENDING
        )
        script = factory.make_Script(timeout=timedelta(minutes=2))
        running_script_result = factory.make_ScriptResult(
            script_set=script_set,
            status=SCRIPT_STATUS.RUNNING,
            script=script,
            started=current_time - timedelta(minutes=50),
            parameters={"runtime": {"type": "runtime", "value": 60 * 60}},
        )

        mark_nodes_failed_after_missing_script_timeout(current_time, 20)
        node = reload_object(node)

        self.assertEquals(self.status, node.status)
        self.assertThat(self.mock_stop, MockNotCalled())
        self.assertEquals(
            SCRIPT_STATUS.PASSED, reload_object(passed_script_result).status
        )
        self.assertEquals(
            SCRIPT_STATUS.FAILED, reload_object(failed_script_result).status
        )
        self.assertEquals(
            SCRIPT_STATUS.PENDING, reload_object(pending_script_result).status
        )
        self.assertEquals(
            SCRIPT_STATUS.RUNNING, reload_object(running_script_result).status
        )
Exemple #11
0
    def test_mark_nodes_failed_after_script_overrun(self):
        node, script_set = self.make_node()
        now = datetime.now()
        script_set.last_ping = now
        script_set.save()
        passed_script_result = factory.make_ScriptResult(
            script_set=script_set, status=SCRIPT_STATUS.PASSED)
        failed_script_result = factory.make_ScriptResult(
            script_set=script_set, status=SCRIPT_STATUS.FAILED)
        pending_script_result = factory.make_ScriptResult(
            script_set=script_set, status=SCRIPT_STATUS.PENDING)
        script = factory.make_Script(timeout=timedelta(seconds=60))
        running_script_result = factory.make_ScriptResult(
            script_set=script_set,
            status=SCRIPT_STATUS.RUNNING,
            script=script,
            started=now - timedelta(minutes=10))

        mark_nodes_failed_after_missing_script_timeout()
        node = reload_object(node)

        self.assertEquals(self.failed_status, node.status)
        self.assertEquals(
            "%s has run past it's timeout(%s)" %
            (running_script_result.name,
             str(running_script_result.script.timeout)),
            node.error_description)
        if node.enable_ssh:
            self.assertThat(self.mock_stop, MockNotCalled())
        else:
            self.assertThat(self.mock_stop, MockCalledOnce())
        self.assertEquals(SCRIPT_STATUS.PASSED,
                          reload_object(passed_script_result).status)
        self.assertEquals(SCRIPT_STATUS.FAILED,
                          reload_object(failed_script_result).status)
        self.assertEquals(SCRIPT_STATUS.ABORTED,
                          reload_object(pending_script_result).status)
        self.assertEquals(SCRIPT_STATUS.TIMEDOUT,
                          reload_object(running_script_result).status)
Exemple #12
0
    def test_mark_nodes_failed_after_missing_timeout_heartbeat(self):
        node, script_set = self.make_node()
        script_set.last_ping = datetime.now() - timedelta(minutes=11)
        script_set.save()
        script_results = [
            factory.make_ScriptResult(script_set=script_set,
                                      status=SCRIPT_STATUS.PENDING)
            for _ in range(3)
        ]

        mark_nodes_failed_after_missing_script_timeout()
        node = reload_object(node)

        self.assertEquals(self.failed_status, node.status)
        self.assertEquals('Node has missed the last 5 heartbeats',
                          node.error_description)
        if node.enable_ssh:
            self.assertThat(self.mock_stop, MockNotCalled())
        else:
            self.assertThat(self.mock_stop, MockCalledOnce())
        for script_result in script_results:
            self.assertEquals(SCRIPT_STATUS.TIMEDOUT,
                              reload_object(script_result).status)