Esempio n. 1
0
    def _report_unschedulable(self, unschedulable, err_msg=None):
        if not unschedulable:
            return

        self._log(
            logging.ERROR,
            f"There are {len(unschedulable)} tests which cannot be run due to insufficient cluster resources"
        )
        for tc in unschedulable:
            if err_msg:
                msg = err_msg
            else:
                msg = f"Test {tc.test_id} requires more resources than are available in the whole cluster. " \
                      f"{self.cluster.all().nodes.attempt_remove_spec(tc.expected_cluster_spec)}"

            self._log(logging.ERROR, msg)

            result = TestResult(tc,
                                self.test_counter,
                                self.session_context,
                                test_status=FAIL,
                                summary=msg,
                                start_time=time.time(),
                                stop_time=time.time())
            self.results.append(result)
            result.report()

            self.test_counter += 1
Esempio n. 2
0
    def run_all_tests(self):
        self.receiver.start()
        self.results.start_time = time.time()

        # Report tests which cannot be run
        if len(self.scheduler.unschedulable) > 0:
            self._log(
                logging.ERROR,
                "There are %d tests which cannot be run due to insufficient cluster resources"
                % len(self.scheduler.unschedulable))

            for tc in self.scheduler.unschedulable:
                msg = "Test %s expects more nodes than are available in the entire cluster: " % tc.test_id
                msg += "expected_num_nodes: %s, " % str(tc.expected_node_spec)
                msg += "cluster size: %s." % str(self.cluster.node_spec)
                self._log(logging.ERROR, msg)

                result = TestResult(tc,
                                    self.test_counter,
                                    self.session_context,
                                    test_status=FAIL,
                                    summary=msg,
                                    start_time=time.time(),
                                    stop_time=time.time())
                self.results.append(result)
                result.report()

                self.test_counter += 1

        # Run the tests!
        self._log(
            logging.INFO, "starting test run with session id %s..." %
            self.session_context.session_id)
        self._log(logging.INFO, "running %d tests..." % len(self.scheduler))
        while self._ready_to_trigger_more_tests or self._expect_client_requests:
            try:
                while self._ready_to_trigger_more_tests:
                    next_test_context = self.scheduler.next()
                    self._preallocate_subcluster(next_test_context)
                    self._run_single_test(next_test_context)

                if self._expect_client_requests:
                    try:
                        event = self.receiver.recv()
                        self._handle(event)
                    except Exception as e:
                        err_str = "Exception receiving message: %s: %s" % (str(
                            type(e)), str(e))
                        err_str += "\n" + traceback.format_exc(limit=16)
                        self._log(logging.ERROR, err_str)

                        # All processes are on the same machine, so treat communication failure as a fatal error
                        raise
            except KeyboardInterrupt:
                # If SIGINT is received, stop triggering new tests, and let the currently running tests finish
                self._log(
                    logging.INFO,
                    "Received KeyboardInterrupt. Now waiting for currently running tests to finish..."
                )
                self.stop_testing = True

        for proc in self._client_procs.values():
            proc.join()
        self.receiver.close()

        return self.results
Esempio n. 3
0
    def run(self):
        self.log(logging.INFO, "Loading test %s" % str(self.test_metadata))
        self.test_context = self._collect_test_context(**self.test_metadata)
        self.test_context.test_index = self.test_index

        self.send(self.message.running())
        if self.test_context.ignore:
            # Skip running this test, but keep track of the fact that we ignored it
            result = TestResult(self.test_context,
                                self.test_index,
                                self.session_context,
                                test_status=IGNORE,
                                start_time=time.time(),
                                stop_time=time.time())
            result.report()
            # Tell the server we are finished
            self.send(self.message.finished(result=result))
            return

        # Results from this test, as well as logs will be dumped here
        mkdir_p(TestContext.results_dir(self.test_context, self.test_index))

        start_time = -1
        stop_time = -1
        test_status = PASS
        summary = ""
        data = None

        try:
            # Instantiate test
            self.test = self.test_context.cls(self.test_context)

            self.log(logging.DEBUG, "Checking if there are enough nodes...")
            min_cluster_spec = self.test.min_cluster_spec()

            # Check test resource
            msg = self.cluster.all().nodes.attempt_remove_spec(min_cluster_spec)
            if len(msg) > 0:
                raise RuntimeError("There are not enough nodes available in the cluster to run this test. " + msg)

            # Run the test unit
            start_time = time.time()
            self.setup_test()

            data = self.run_test()

            test_status = PASS
            self.log(logging.INFO, "PASS")

        except BaseException as e:
            err_trace = str(e) + "\n" + traceback.format_exc(limit=16)
            self.log(logging.INFO, "FAIL: " + err_trace)

            test_status = FAIL
            summary += err_trace

        finally:
            self.teardown_test(teardown_services=not self.session_context.no_teardown, test_status=test_status)

            stop_time = time.time()

            if hasattr(self, "services"):
                service_errors = self.test_context.services.errors()
                if service_errors:
                    summary += "\n\n" + service_errors

            result = TestResult(
                self.test_context,
                self.test_index,
                self.session_context,
                test_status,
                summary,
                data,
                start_time,
                stop_time)

            self.log(logging.INFO, "Summary: %s" % str(result.summary))
            self.log(logging.INFO, "Data: %s" % str(result.data))

            result.report()

        # Tell the server we are finished
        self._do_safely(lambda: self.send(self.message.finished(result=result)), "Problem sending FINISHED message:")

        # Release test_context resources only after creating the result and finishing logging activity
        # The Sender object uses the same logger, so we postpone closing until after the finished message is sent
        self.test_context.close()
        self.test_context = None
        self.test = None
Esempio n. 4
0
    def run(self):
        self.log(logging.INFO, "Loading test %s" % str(self.test_metadata))
        self.test_context = self._collect_test_context(**self.test_metadata)
        self.test_context.test_index = self.test_index

        self.send(self.message.running())
        if self.test_context.ignore:
            # Skip running this test, but keep track of the fact that we ignored it
            result = TestResult(self.test_context,
                                self.test_index,
                                self.session_context,
                                test_status=IGNORE,
                                start_time=time.time(),
                                stop_time=time.time())
            result.report()
            # Tell the server we are finished
            self.send(self.message.finished(result=result))
            return

        # Results from this test, as well as logs will be dumped here
        mkdir_p(TestContext.results_dir(self.test_context, self.test_index))

        start_time = -1
        stop_time = -1
        test_status = PASS
        summary = ""
        data = None

        try:
            # Instantiate test
            self.test = self.test_context.cls(self.test_context)

            self.log(logging.DEBUG, "Checking if there are enough nodes...")
            for (operating_system,
                 node_count) in self.test.min_cluster_size().iteritems():
                if node_count > self.cluster.num_nodes_for_operating_system(
                        operating_system):
                    raise RuntimeError(
                        "There are not enough nodes available in the cluster to run this test. "
                        "Cluster size for %s: %d, Need at least: %d. Services currently registered: %s"
                        % (operating_system,
                           self.cluster.num_nodes_for_operating_system(
                               operating_system), node_count,
                           self.test_context.services))

            # Run the test unit
            start_time = time.time()
            self.setup_test()

            data = self.run_test()

            test_status = PASS
            self.log(logging.INFO, "PASS")

        except BaseException as e:
            err_trace = str(e.message) + "\n" + traceback.format_exc(limit=16)
            self.log(logging.INFO, "FAIL: " + err_trace)

            test_status = FAIL
            summary += err_trace

        finally:
            self.teardown_test(
                teardown_services=not self.session_context.no_teardown,
                test_status=test_status)

            stop_time = time.time()

            result = TestResult(self.test_context, self.test_index,
                                self.session_context, test_status, summary,
                                data, start_time, stop_time)

            self.log(logging.INFO, "Summary: %s" % str(result.summary))
            self.log(logging.INFO, "Data: %s" % str(result.data))

            result.report()

        # Tell the server we are finished
        self._do_safely(
            lambda: self.send(self.message.finished(result=result)),
            "Problem sending FINISHED message:")

        # Release test_context resources only after creating the result and finishing logging activity
        # The Sender object uses the same logger, so we postpone closing until after the finished message is sent
        self.test_context.close()
        self.test_context = None
        self.test = None
Esempio n. 5
0
    def run_all_tests(self):
        self.receiver.start()
        self.results.start_time = time.time()

        # Report tests which cannot be run
        if len(self.scheduler.unschedulable) > 0:
            self._log(logging.ERROR,
                      "There are %d tests which cannot be run due to insufficient cluster resources" %
                      len(self.scheduler.unschedulable))

            for tc in self.scheduler.unschedulable:
                msg = "Test %s requires more resources than are available in the whole cluster. " % tc.test_id
                msg += self.cluster.all().nodes.attempt_remove_spec(tc.expected_cluster_spec)

                self._log(logging.ERROR, msg)

                result = TestResult(
                    tc,
                    self.test_counter,
                    self.session_context,
                    test_status=FAIL,
                    summary=msg,
                    start_time=time.time(),
                    stop_time=time.time())
                self.results.append(result)
                result.report()

                self.test_counter += 1

        # Run the tests!
        self._log(logging.INFO, "starting test run with session id %s..." % self.session_context.session_id)
        self._log(logging.INFO, "running %d tests..." % len(self.scheduler))
        while self._ready_to_trigger_more_tests or self._expect_client_requests:
            try:
                while self._ready_to_trigger_more_tests:
                    next_test_context = self.scheduler.next()
                    self._preallocate_subcluster(next_test_context)
                    self._run_single_test(next_test_context)

                if self._expect_client_requests:
                    try:
                        event = self.receiver.recv()
                        self._handle(event)
                    except Exception as e:
                        err_str = "Exception receiving message: %s: %s" % (str(type(e)), str(e))
                        err_str += "\n" + traceback.format_exc(limit=16)
                        self._log(logging.ERROR, err_str)

                        # All processes are on the same machine, so treat communication failure as a fatal error
                        raise
            except KeyboardInterrupt:
                # If SIGINT is received, stop triggering new tests, and let the currently running tests finish
                self._log(logging.INFO,
                          "Received KeyboardInterrupt. Now waiting for currently running tests to finish...")
                self.stop_testing = True

        for proc in self._client_procs.values():
            proc.join()
        self.receiver.close()

        return self.results
Esempio n. 6
0
    def run(self):
        self.log(logging.INFO, "Loading test %s" % str(self.test_metadata))
        self.test_context = self._collect_test_context(**self.test_metadata)
        self.test_context.test_index = self.test_index

        self.send(self.message.running())
        if self.test_context.ignore:
            # Skip running this test, but keep track of the fact that we ignored it
            result = TestResult(self.test_context,
                                self.test_index,
                                self.session_context,
                                test_status=IGNORE,
                                start_time=time.time(),
                                stop_time=time.time())
            result.report()
            # Tell the server we are finished
            self.send(self.message.finished(result=result))
            return

        # Results from this test, as well as logs will be dumped here
        mkdir_p(TestContext.results_dir(self.test_context, self.test_index))

        start_time = -1
        stop_time = -1
        test_status = PASS
        summary = ""
        data = None

        try:
            # Instantiate test
            self.test = self.test_context.cls(self.test_context)

            self.log(logging.DEBUG, "Checking if there are enough nodes...")
            min_cluster_spec = self.test.min_cluster_spec()
            os_to_num_nodes = {}
            for node_spec in min_cluster_spec:
                if not os_to_num_nodes.get(node_spec.operating_system):
                    os_to_num_nodes[node_spec.operating_system] = 1
                else:
                    os_to_num_nodes[node_spec.operating_system] = os_to_num_nodes[node_spec.operating_system] + 1
            for (operating_system, node_count) in os_to_num_nodes.iteritems():
                num_avail = len(list(self.cluster.all().nodes.elements(operating_system=operating_system)))
                if node_count > num_avail:
                    raise RuntimeError(
                        "There are not enough nodes available in the cluster to run this test. "
                        "Cluster size for %s: %d, Need at least: %d. Services currently registered: %s" %
                        (operating_system, num_avail, node_count, self.test_context.services))

            # Run the test unit
            start_time = time.time()
            self.setup_test()

            data = self.run_test()

            test_status = PASS
            self.log(logging.INFO, "PASS")

        except BaseException as e:
            err_trace = str(e.message) + "\n" + traceback.format_exc(limit=16)
            self.log(logging.INFO, "FAIL: " + err_trace)

            test_status = FAIL
            summary += err_trace

        finally:
            self.teardown_test(teardown_services=not self.session_context.no_teardown, test_status=test_status)

            stop_time = time.time()

            service_errors = self.test_context.services.errors()
            if service_errors:
                summary += "\n\n" + service_errors

            result = TestResult(
                self.test_context,
                self.test_index,
                self.session_context,
                test_status,
                summary,
                data,
                start_time,
                stop_time)

            self.log(logging.INFO, "Summary: %s" % str(result.summary))
            self.log(logging.INFO, "Data: %s" % str(result.data))

            result.report()

        # Tell the server we are finished
        self._do_safely(lambda: self.send(self.message.finished(result=result)), "Problem sending FINISHED message:")

        # Release test_context resources only after creating the result and finishing logging activity
        # The Sender object uses the same logger, so we postpone closing until after the finished message is sent
        self.test_context.close()
        self.test_context = None
        self.test = None
Esempio n. 7
0
    def run(self):
        self.log(logging.INFO, "Loading test %s" % str(self.test_metadata))
        self.test_context = self._collect_test_context(**self.test_metadata)
        self.test_context.test_index = self.test_index

        self.send(self.message.running())
        if self.test_context.ignore:
            # Skip running this test, but keep track of the fact that we ignored it
            result = TestResult(self.test_context,
                                self.test_index,
                                self.session_context,
                                test_status=IGNORE,
                                start_time=time.time(),
                                stop_time=time.time())
            result.report()
            # Tell the server we are finished
            self.send(self.message.finished(result=result))
            return

        start_time = -1
        stop_time = -1
        test_status = FAIL
        summary = []
        data = None
        self.all_services = ServiceRegistry()

        num_runs = 0

        try:
            while test_status == FAIL and num_runs < self.deflake_num:
                num_runs += 1
                self.log(logging.INFO, "on run {}/{}".format(num_runs, self.deflake_num))
                start_time = time.time()
                test_status, summary, data = self._do_run(num_runs)

                if test_status == PASS and num_runs > 1:
                    test_status = FLAKY

                msg = str(test_status.to_json())
                if summary:
                    msg += ": {}".format(summary)
                if num_runs != self.deflake_num:
                    msg += "\n" + "~" * max(len(line) for line in summary.split('\n'))

                self.log(logging.INFO, msg)

        finally:
            stop_time = time.time()

            test_status, summary = self._check_cluster_utilization(test_status, summary)

            if num_runs > 1:
                # for reporting purposes report all services
                self.test_context.services = self.all_services
            # for flaky tests, we report the start and end time of the successful run, and not the whole run period
            result = TestResult(
                self.test_context,
                self.test_index,
                self.session_context,
                test_status,
                summary,
                data,
                start_time,
                stop_time)

            self.log(logging.INFO, "Data: %s" % str(result.data))

            result.report()
            # Tell the server we are finished
            self._do_safely(lambda: self.send(self.message.finished(result=result)),
                            "Problem sending FINISHED message for " + str(self.test_metadata) + ":\n")
            # Release test_context resources only after creating the result and finishing logging activity
            # The Sender object uses the same logger, so we postpone closing until after the finished message is sent
            self.test_context.close()
            self.all_services = None
            self.test_context = None
            self.test = None