Ejemplo n.º 1
0
def create(cfg, metrics_store, node_ip, node_http_port, all_node_ips, all_node_ids, sources=False, distribution=False,
           external=False, docker=False):
    race_root_path = paths.race_root(cfg)
    node_ids = cfg.opts("provisioning", "node.ids", mandatory=False)
    node_name_prefix = cfg.opts("provisioning", "node.name.prefix")
    car, plugins = load_team(cfg, external)

    if sources or distribution:
        s = supplier.create(cfg, sources, distribution, car, plugins)
        p = []
        all_node_names = ["%s-%s" % (node_name_prefix, n) for n in all_node_ids]
        for node_id in node_ids:
            node_name = "%s-%s" % (node_name_prefix, node_id)
            p.append(
                provisioner.local(cfg, car, plugins, node_ip, node_http_port, all_node_ips,
                                  all_node_names, race_root_path, node_name))
        l = launcher.ProcessLauncher(cfg)
    elif external:
        raise exceptions.RallyAssertionError("Externally provisioned clusters should not need to be managed by Rally's mechanic")
    elif docker:
        if len(plugins) > 0:
            raise exceptions.SystemSetupError("You cannot specify any plugins for Docker clusters. Please remove "
                                              "\"--elasticsearch-plugins\" and try again.")
        s = lambda: None
        p = []
        for node_id in node_ids:
            node_name = "%s-%s" % (node_name_prefix, node_id)
            p.append(provisioner.docker(cfg, car, node_ip, node_http_port, race_root_path, node_name))
        l = launcher.DockerLauncher(cfg)
    else:
        # It is a programmer error (and not a user error) if this function is called with wrong parameters
        raise RuntimeError("One of sources, distribution, docker or external must be True")

    return Mechanic(cfg, metrics_store, s, p, l)
Ejemplo n.º 2
0
    def test_daemon_start_stop(self, wait_for_pidfile, chdir, get_size,
                               supports, java_home, kill):
        cfg = config.Config()
        cfg.add(config.Scope.application, "node", "root.dir", "test")
        cfg.add(config.Scope.application, "mechanic", "keep.running", False)
        cfg.add(config.Scope.application, "mechanic", "telemetry.devices", [])
        cfg.add(config.Scope.application, "mechanic", "telemetry.params", None)
        cfg.add(config.Scope.application, "system", "env.name", "test")

        ms = get_metrics_store(cfg)
        proc_launcher = launcher.ProcessLauncher(cfg, ms,
                                                 paths.races_root(cfg))

        node_config = NodeConfiguration(car=Car("default",
                                                root_path=None,
                                                config_paths=[]),
                                        ip="127.0.0.1",
                                        node_name="testnode",
                                        node_root_path="/tmp",
                                        binary_path="/tmp",
                                        log_path="/tmp",
                                        data_paths="/tmp")

        nodes = proc_launcher.start([node_config])
        self.assertEqual(len(nodes), 1)
        self.assertEqual(nodes[0].pid, MOCK_PID_VALUE)

        proc_launcher.stop(nodes)
        self.assertTrue(kill.called)
Ejemplo n.º 3
0
    def test_env_options_order(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "mechanic", "keep.running", False)
        cfg.add(config.Scope.application, "system", "env.name", "test")

        ms = get_metrics_store(cfg)
        proc_launcher = launcher.ProcessLauncher(cfg,
                                                 ms,
                                                 races_root_dir="/home")
        default_car = team.Car(names="default-car",
                               root_path=None,
                               config_paths=["/tmp/rally-config"])

        node_telemetry = [
            telemetry.FlightRecorder(telemetry_params={},
                                     log_root="/tmp/telemetry",
                                     java_major_version=8)
        ]
        t = telemetry.Telemetry(["jfr"], devices=node_telemetry)
        env = proc_launcher._prepare_env(car=default_car,
                                         node_name="node0",
                                         java_home="/java_home",
                                         t=t)

        self.assertEqual("/java_home/bin" + os.pathsep + os.environ["PATH"],
                         env["PATH"])
        self.assertEqual(
            "-XX:+ExitOnOutOfMemoryError -XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints "
            "-XX:+UnlockCommercialFeatures -XX:+FlightRecorder "
            "-XX:FlightRecorderOptions=disk=true,maxage=0s,maxsize=0,dumponexit=true,dumponexitpath=/tmp/telemetry/default-car-node0.jfr "
            "-XX:StartFlightRecording=defaultrecording=true",
            env["ES_JAVA_OPTS"])
Ejemplo n.º 4
0
    def test_daemon_start_stop(self, wait_for_pidfile, chdir, get_size,
                               supports, java_home):
        cfg = config.Config()
        cfg.add(config.Scope.application, "node", "root.dir", "test")
        cfg.add(config.Scope.application, "mechanic", "keep.running", False)
        cfg.add(config.Scope.application, "telemetry", "devices", [])
        cfg.add(config.Scope.application, "telemetry", "params", None)
        cfg.add(config.Scope.application, "system", "env.name", "test")

        ms = get_metrics_store(cfg)
        proc_launcher = launcher.ProcessLauncher(cfg)

        node_configs = []
        for node in range(2):
            node_configs.append(
                NodeConfiguration(build_type="tar",
                                  car_env={},
                                  car_runtime_jdks="12,11",
                                  ip="127.0.0.1",
                                  node_name="testnode-{}".format(node),
                                  node_root_path="/tmp",
                                  binary_path="/tmp",
                                  data_paths="/tmp"))

        nodes = proc_launcher.start(node_configs)
        self.assertEqual(len(nodes), 2)
        self.assertEqual(nodes[0].pid, MOCK_PID_VALUE)

        stopped_nodes = proc_launcher.stop(nodes, ms)
        # all nodes should be stopped
        self.assertEqual(nodes, stopped_nodes)
Ejemplo n.º 5
0
    def test_env_options_order(self, sleep):
        cfg = config.Config()
        cfg.add(config.Scope.application, "mechanic", "keep.running", False)
        cfg.add(config.Scope.application, "system", "env.name", "test")

        proc_launcher = launcher.ProcessLauncher(cfg)

        node_telemetry = [
            telemetry.FlightRecorder(telemetry_params={},
                                     log_root="/tmp/telemetry",
                                     java_major_version=8)
        ]
        t = telemetry.Telemetry(["jfr"], devices=node_telemetry)
        env = proc_launcher._prepare_env(car_env={},
                                         node_name="node0",
                                         java_home="/java_home",
                                         t=t)

        self.assertEqual("/java_home/bin" + os.pathsep + os.environ["PATH"],
                         env["PATH"])
        self.assertEqual(
            "-XX:+ExitOnOutOfMemoryError -XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints "
            "-XX:+UnlockCommercialFeatures -XX:+FlightRecorder "
            "-XX:FlightRecorderOptions=disk=true,maxage=0s,maxsize=0,dumponexit=true,dumponexitpath=/tmp/telemetry/profile.jfr "  # pylint: disable=line-too-long
            "-XX:StartFlightRecording=defaultrecording=true",
            env["ES_JAVA_OPTS"])
Ejemplo n.º 6
0
    def test_daemon_start_stop(self, process, wait_for_pidfile, node_config, ms, cfg, chdir, supports, java_home, kill):
        proc_launcher = launcher.ProcessLauncher(cfg, ms, paths.races_root(cfg))

        nodes = proc_launcher.start([node_config])
        self.assertEqual(len(nodes), 1)
        self.assertEqual(nodes[0].pid, MOCK_PID_VALUE)

        proc_launcher.keep_running = False
        proc_launcher.stop(nodes)
        self.assertTrue(kill.called)
Ejemplo n.º 7
0
    def test_bundled_jdk_not_in_path(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "mechanic", "keep.running", False)
        cfg.add(config.Scope.application, "system", "env.name", "test")

        proc_launcher = launcher.ProcessLauncher(cfg)

        t = telemetry.Telemetry()
        # no JAVA_HOME -> use the bundled JDK
        env = proc_launcher._prepare_env(car_env={}, node_name="node0", java_home=None, t=t)

        # unmodified
        self.assertEqual(os.environ["PATH"], env["PATH"])
Ejemplo n.º 8
0
def create(cfg,
           metrics_store,
           all_node_ips,
           cluster_settings=None,
           sources=False,
           build=False,
           distribution=False,
           external=False,
           docker=False):
    races_root = paths.races_root(cfg)
    challenge_root_path = paths.race_root(cfg)
    node_ids = cfg.opts("provisioning", "node.ids", mandatory=False)
    car, plugins = load_team(cfg, external)

    if sources or distribution:
        s = supplier.create(cfg, sources, distribution, build,
                            challenge_root_path, car, plugins)
        p = []
        for node_id in node_ids:
            p.append(
                provisioner.local_provisioner(cfg, car, plugins,
                                              cluster_settings, all_node_ips,
                                              challenge_root_path, node_id))
        l = launcher.ProcessLauncher(cfg, metrics_store, races_root)
    elif external:
        if len(plugins) > 0:
            raise exceptions.SystemSetupError(
                "You cannot specify any plugins for externally provisioned clusters. Please remove "
                "\"--elasticsearch-plugins\" and try again.")

        s = lambda: None
        p = [provisioner.no_op_provisioner()]
        l = launcher.ExternalLauncher(cfg, metrics_store)
    elif docker:
        if len(plugins) > 0:
            raise exceptions.SystemSetupError(
                "You cannot specify any plugins for Docker clusters. Please remove "
                "\"--elasticsearch-plugins\" and try again.")
        s = lambda: None
        p = []
        for node_id in node_ids:
            p.append(
                provisioner.docker_provisioner(cfg, car, cluster_settings,
                                               challenge_root_path, node_id))
        l = launcher.DockerLauncher(cfg, metrics_store)
    else:
        # It is a programmer error (and not a user error) if this function is called with wrong parameters
        raise RuntimeError(
            "One of sources, distribution, docker or external must be True")

    return Mechanic(s, p, l)
Ejemplo n.º 9
0
    def test_pass_java_opts(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "system", "env.name", "test")
        cfg.add(config.Scope.application, "system", "passenv", "ES_JAVA_OPTS")
        os.environ["ES_JAVA_OPTS"] = "-XX:-someJunk"

        proc_launcher = launcher.ProcessLauncher(cfg)

        t = telemetry.Telemetry()
        # no JAVA_HOME -> use the bundled JDK
        env = proc_launcher._prepare_env(node_name="node0", java_home=None, t=t)

        # unmodified
        assert env["ES_JAVA_OPTS"] == os.environ["ES_JAVA_OPTS"]
Ejemplo n.º 10
0
    def test_bundled_jdk_not_in_path(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "system", "env.name", "test")
        os.environ["JAVA_HOME"] = "/path/to/java"

        proc_launcher = launcher.ProcessLauncher(cfg)

        t = telemetry.Telemetry()
        # no JAVA_HOME -> use the bundled JDK
        env = proc_launcher._prepare_env(node_name="node0", java_home=None, t=t)

        # unmodified
        assert env["PATH"] == os.environ["PATH"]
        assert env.get("JAVA_HOME") is None
Ejemplo n.º 11
0
def stop(cfg):
    root_path = paths.install_root(cfg)
    node_config = provisioner.load_node_configuration(root_path)
    if node_config.build_type == "tar":
        node_launcher = launcher.ProcessLauncher(cfg)
    elif node_config.build_type == "docker":
        node_launcher = launcher.DockerLauncher(cfg)
    else:
        raise exceptions.SystemSetupError("Unknown build type [{}]".format(
            node_config.build_type))

    nodes, race_id = _load_node_file(root_path)

    cls = metrics.metrics_store_class(cfg)
    metrics_store = cls(cfg)

    race_store = metrics.race_store(cfg)
    try:
        current_race = race_store.find_by_race_id(race_id)
    except exceptions.NotFound:
        logging.getLogger(__name__).info(
            "Could not find race [%s] most likely because an in-memory metrics store is "
            "used across multiple machines. Use an Elasticsearch metrics store to persist "
            "results.", race_id)
        # we are assuming here that we use an Elasticsearch metrics store... . If we use a file race store (across
        # multiple machines) we will not be able to retrieve a race. In that case we open our in-memory metrics store
        # with settings derived from startup parameters (because we can't store system metrics persistently anyway).
        current_race = metrics.create_race(cfg, track=None, challenge=None)

    metrics_store.open(race_id=current_race.race_id,
                       race_timestamp=current_race.race_timestamp,
                       track_name=current_race.track_name,
                       challenge_name=current_race.challenge_name)

    node_launcher.stop(nodes, metrics_store)
    _delete_node_file(root_path)

    metrics_store.flush(refresh=True)
    for node in nodes:
        results = metrics.calculate_system_results(metrics_store,
                                                   node.node_name)
        current_race.add_results(results)
        metrics.results_store(cfg).store_results(current_race)

    metrics_store.close()

    # TODO: Do we need to expose this as a separate command as well?
    provisioner.cleanup(preserve=cfg.opts("mechanic", "preserve.install"),
                        install_dir=node_config.binary_path,
                        data_paths=node_config.data_paths)
Ejemplo n.º 12
0
    def test_daemon_stop_with_already_terminated_process(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "node", "root.dir", "test")
        cfg.add(config.Scope.application, "telemetry", "devices", [])
        cfg.add(config.Scope.application, "telemetry", "params", None)
        cfg.add(config.Scope.application, "system", "env.name", "test")

        ms = get_metrics_store(cfg)
        proc_launcher = launcher.ProcessLauncher(cfg)

        nodes = [cluster.Node(pid=-1, binary_path="/bin", host_name="localhost", node_name="rally-0", telemetry=telemetry.Telemetry())]

        stopped_nodes = proc_launcher.stop(nodes, ms)
        # no nodes should have been stopped (they were already stopped)
        assert stopped_nodes == []
Ejemplo n.º 13
0
def stop(cfg):
    root_path = paths.install_root(cfg)
    node_config = provisioner.load_node_configuration(root_path)
    if node_config.build_type == "tar":
        node_launcher = launcher.ProcessLauncher(cfg)
    elif node_config.build_type == "docker":
        node_launcher = launcher.DockerLauncher(cfg)
    else:
        raise exceptions.SystemSetupError("Unknown build type [{}]".format(
            node_config.build_type))

    nodes, race_id = _load_node_file(root_path)

    cls = metrics.metrics_store_class(cfg)
    metrics_store = cls(cfg)

    race_store = metrics.race_store(cfg)
    try:
        current_race = race_store.find_by_race_id(race_id)
        metrics_store.open(race_id=current_race.race_id,
                           race_timestamp=current_race.race_timestamp,
                           track_name=current_race.track_name,
                           challenge_name=current_race.challenge_name)
    except exceptions.NotFound:
        logging.getLogger(__name__).info(
            "Could not find race [%s] and will thus not persist system metrics.",
            race_id)
        # Don't persist system metrics if we can't retrieve the race as we cannot derive the required meta-data.
        current_race = None
        metrics_store = None

    node_launcher.stop(nodes, metrics_store)
    _delete_node_file(root_path)

    if current_race:
        metrics_store.flush(refresh=True)
        for node in nodes:
            results = metrics.calculate_system_results(metrics_store,
                                                       node.node_name)
            current_race.add_results(results)
            metrics.results_store(cfg).store_results(current_race)

        metrics_store.close()

    # TODO: Do we need to expose this as a separate command as well?
    provisioner.cleanup(preserve=cfg.opts("mechanic", "preserve.install"),
                        install_dir=node_config.binary_path,
                        data_paths=node_config.data_paths)
Ejemplo n.º 14
0
    def test_pass_env_vars(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "system", "env.name", "test")
        cfg.add(config.Scope.application, "system", "passenv", "JAVA_HOME,FOO1")
        os.environ["JAVA_HOME"] = "/path/to/java"
        os.environ["FOO1"] = "BAR1"

        proc_launcher = launcher.ProcessLauncher(cfg)

        t = telemetry.Telemetry()
        # no JAVA_HOME -> use the bundled JDK
        env = proc_launcher._prepare_env(node_name="node0", java_home=None, t=t)

        # unmodified
        assert env["JAVA_HOME"] == os.environ["JAVA_HOME"]
        assert env["FOO1"] == os.environ["FOO1"]
        assert env["ES_JAVA_OPTS"] == "-XX:+ExitOnOutOfMemoryError"
Ejemplo n.º 15
0
def start(cfg):
    root_path = paths.install_root(cfg)
    race_id = cfg.opts("system", "race.id")
    # avoid double-launching - we expect that the node file is absent
    with contextlib.suppress(FileNotFoundError):
        _load_node_file(root_path)
        install_id = cfg.opts("system", "install.id")
        raise exceptions.SystemSetupError("A node with this installation id is already running. Please stop it first "
                                          "with {} stop --installation-id={}".format(PROGRAM_NAME, install_id))

    node_config = provisioner.load_node_configuration(root_path)

    if node_config.build_type == "tar":
        node_launcher = launcher.ProcessLauncher(cfg)
    elif node_config.build_type == "docker":
        node_launcher = launcher.DockerLauncher(cfg)
    else:
        raise exceptions.SystemSetupError("Unknown build type [{}]".format(node_config.build_type))
    nodes = node_launcher.start([node_config])
    _store_node_file(root_path, (nodes, race_id))