Esempio n. 1
0
    def _setup_zk(self):
        config = self.config
        self.cli.pull(config["zk_image"])
        container = self.cli.create_container(
            name=config["zk_container"],
            hostname=config["zk_container"],
            host_config=self.cli.create_host_config(
                port_bindings={
                    config["default_zk_port"]: config["local_zk_port"],
                },
            ),
            image=config["zk_image"],
            detach=True,
        )
        self.cli.start(container=container.get("Id"))
        print_utils.okgreen("started container %s" % config["zk_container"])
        print_utils.okgreen("waiting on %s to be rdy" % config["zk_container"])

        count = 0
        while count < utils.max_retry_attempts:
            count += 1
            if utils.is_zk_ready(config["local_zk_port"]):
                return
            time.sleep(utils.sleep_time_secs)

        raise Exception("zk failed to come up in time")
Esempio n. 2
0
def create_cassandra_store(config):
    retry_attempts = 0
    while retry_attempts < utils.max_retry_attempts:
        time.sleep(utils.sleep_time_secs)
        setup_exe = cli.exec_create(
            container=config["cassandra_container"],
            cmd="/files/setup_cassandra.sh",
        )
        show_exe = cli.exec_create(
            container=config["cassandra_container"],
            cmd='cqlsh -e "describe %s"' % config["cassandra_test_db"],
        )
        # by api design, exec_start needs to be called after exec_create
        # to run 'docker exec'
        resp = cli.exec_start(exec_id=setup_exe)
        if resp is "":
            resp = cli.exec_start(exec_id=show_exe)
            if "CREATE KEYSPACE peloton_test WITH" in resp:
                print_utils.okgreen("cassandra store is created")
                return
        print_utils.warn("failed to create cassandra store, retrying...")
        retry_attempts += 1

    print_utils.fail("Failed to create cassandra store after %d attempts, "
                     "aborting..." % utils.max_retry_attempts)
    sys.exit(1)
Esempio n. 3
0
    def _setup_cassandra(self):
        config = self.config
        self.cli.remove_existing_container(config["cassandra_container"])
        self.cli.pull(config["cassandra_image"])
        container = self.cli.create_container(
            name=config["cassandra_container"],
            hostname=config["cassandra_container"],
            host_config=self.cli.create_host_config(
                port_bindings={
                    config["cassandra_cql_port"]: config[
                        "local_cassandra_cql_port"
                    ],
                    config["cassandra_thrift_port"]: config[
                        "local_cassandra_thrift_port"
                    ],
                },
                binds=[work_dir + "/files:/files"],
            ),
            environment=["MAX_HEAP_SIZE=1G", "HEAP_NEWSIZE=256M"],
            image=config["cassandra_image"],
            detach=True,
            entrypoint="bash /files/run_cassandra_with_stratio_index.sh",
        )
        self.cli.start(container=container.get("Id"))
        print_utils.okgreen("started container %s" %
                            config["cassandra_container"])

        self._create_cassandra_store()
Esempio n. 4
0
def run_mesos_agent(
    config,
    agent_index,
    port_offset,
    is_exclusive=False,
    exclusive_label_value="",
):
    prefix = config["mesos_agent_container"]
    attributes = config["attributes"]
    if is_exclusive:
        prefix += "-exclusive"
        attributes += ";peloton/exclusive:" + exclusive_label_value
    agent = prefix + repr(agent_index)
    port = config["local_agent_port"] + port_offset
    container = cli.create_container(
        name=agent,
        hostname=agent,
        volumes=["/files", "/var/run/docker.sock"],
        ports=[repr(config["default_agent_port"])],
        host_config=cli.create_host_config(
            port_bindings={config["default_agent_port"]: port},
            binds=[
                work_dir + "/files:/files",
                work_dir + "/mesos_config/etc_mesos-slave:/etc/mesos-slave",
                "/var/run/docker.sock:/var/run/docker.sock",
            ],
            privileged=True,
        ),
        environment=[
            "MESOS_PORT=" + repr(port),
            "MESOS_MASTER=zk://{0}:{1}/mesos".format(
                utils.get_container_ip(config["zk_container"]),
                config["default_zk_port"],
            ),
            "MESOS_SWITCH_USER="******"switch_user"]),
            "MESOS_CONTAINERIZERS=" + config["containers"],
            "MESOS_LOG_DIR=" + config["log_dir"],
            "MESOS_ISOLATION=" + config["isolation"],
            "MESOS_SYSTEMD_ENABLE_SUPPORT=false",
            "MESOS_IMAGE_PROVIDERS=" + config["image_providers"],
            "MESOS_IMAGE_PROVISIONER_BACKEND={0}".format(
                config["image_provisioner_backend"]),
            "MESOS_APPC_STORE_DIR=" + config["appc_store_dir"],
            "MESOS_WORK_DIR=" + config["work_dir"],
            "MESOS_RESOURCES=" + config["resources"],
            "MESOS_ATTRIBUTES=" + attributes,
            "MESOS_MODULES=" + config["modules"],
            "MESOS_RESOURCE_ESTIMATOR=" + config["resource_estimator"],
            "MESOS_OVERSUBSCRIBED_RESOURCES_INTERVAL=" +
            config["oversubscribed_resources_interval"],
            "MESOS_QOS_CONTROLLER=" + config["qos_controller"],
            "MESOS_QOS_CORRECTION_INTERVAL_MIN=" +
            config["qos_correction_interval_min"],
        ],
        image=config["mesos_slave_image"],
        entrypoint="bash /files/run_mesos_slave.sh",
        detach=True,
    )
    cli.start(container=container.get("Id"))
    print_utils.okgreen("started container %s" % agent)
Esempio n. 5
0
 def teardown(self, stop=False):
     print_utils.okgreen("teardown started...")
     self._teardown_peloton(stop)
     self._teardown_mesos()
     self._teardown_k8s()
     self._teardown_cassandra()
     self._teardown_zk()
     print_utils.okgreen("teardown complete!")
Esempio n. 6
0
def teardown(stop=False):
    if stop:
        # Stop existing container
        func = utils.stop_container
    else:
        # Remove existing container
        func = utils.remove_existing_container

    # 1 - Remove jobmgr instances
    for i in range(0, config["peloton_jobmgr_instance_count"]):
        name = config["peloton_jobmgr_container"] + repr(i)
        func(name)

    # 2 - Remove placement engine instances
    for i in range(0, len(config["peloton_placement_instances"])):
        name = config["peloton_placement_container"] + repr(i)
        func(name)

    # 3 - Remove resmgr instances
    for i in range(0, config["peloton_resmgr_instance_count"]):
        name = config["peloton_resmgr_container"] + repr(i)
        func(name)

    # 4 - Remove hostmgr instances
    for i in range(0, config["peloton_hostmgr_instance_count"]):
        name = config["peloton_hostmgr_container"] + repr(i)
        func(name)

    # 5 - Remove archiver instances
    for i in range(0, config["peloton_archiver_instance_count"]):
        name = config["peloton_archiver_container"] + repr(i)
        func(name)

    # 6 - Remove aurorabridge instances
    for i in range(0, config["peloton_aurorabridge_instance_count"]):
        name = config["peloton_aurorabridge_container"] + repr(i)
        func(name)

    # 7 - Remove apiproxy instances
    for i in range(0, config["peloton_apiproxy_instance_count"]):
        name = config["peloton_apiproxy_container"] + repr(i)
        func(name)

    minicluster.teardown_mesos(config)
    minicluster.teardown_k8s()

    utils.remove_existing_container(config["cassandra_container"])
    print_utils.okgreen("teardown complete!")
Esempio n. 7
0
def wait_for_up(app, port):
    count = 0
    error = ""
    url = "http://%s:%s/%s" % (default_host, port, healthcheck_path)
    while count < max_retry_attempts:
        try:
            r = requests.get(url)
            if r.status_code == 200:
                print_utils.okgreen("started %s" % app)
                return
        except Exception as e:
            print_utils.warn("app %s is not up yet, retrying..." % app)
            error = str(e)
            time.sleep(sleep_time_secs)
            count += 1

    raise Exception("failed to start %s on %d after %d attempts, err: %s" %
                    (app, port, max_retry_attempts, error))
Esempio n. 8
0
 def _setup_mesos_master(self):
     config = self.config
     self.cli.pull(config["mesos_master_image"])
     container = self.cli.create_container(
         name=config["mesos_master_container"],
         hostname=config["mesos_master_container"],
         volumes=["/files"],
         ports=[repr(config["master_port"])],
         host_config=self.cli.create_host_config(
             port_bindings={config["master_port"]: config[
                 "local_master_port"
             ]},
             binds=[
                 work_dir + "/files:/files",
                 work_dir + "/mesos_config/etc_mesos-master:/etc/mesos-master",
             ],
             privileged=True,
         ),
         environment=[
             "MESOS_AUTHENTICATE_HTTP_READWRITE=true",
             "MESOS_AUTHENTICATE_FRAMEWORKS=true",
             # TODO: Enable following flags for fully authentication.
             "MESOS_AUTHENTICATE_HTTP_FRAMEWORKS=true",
             "MESOS_HTTP_FRAMEWORK_AUTHENTICATORS=basic",
             "MESOS_CREDENTIALS=/etc/mesos-master/credentials",
             "MESOS_LOG_DIR=" + config["log_dir"],
             "MESOS_PORT=" + repr(config["master_port"]),
             "MESOS_ZK=zk://{0}:{1}/mesos".format(
                 self.cli.get_container_ip(config["zk_container"]),
                 config["default_zk_port"],
             ),
             "MESOS_QUORUM=" + repr(config["quorum"]),
             "MESOS_REGISTRY=" + config["registry"],
             "MESOS_WORK_DIR=" + config["work_dir"],
         ],
         image=config["mesos_master_image"],
         entrypoint="bash /files/run_mesos_master.sh",
         detach=True,
     )
     self.cli.start(container=container.get("Id"))
     master_container = config["mesos_master_container"]
     print_utils.okgreen("started container %s" % master_container)
Esempio n. 9
0
    def wait_for_mesos_master_leader(self, timeout_secs=20):
        """
        util method to wait for mesos master leader elected
        """

        port = self.config.get("local_master_port")
        url = "{}:{}/state.json".format(utils.HTTP_LOCALHOST, port)
        print_utils.warn("waiting for mesos master leader")
        deadline = time.time() + timeout_secs
        while time.time() < deadline:
            try:
                resp = requests.get(url)
                if resp.status_code != 200:
                    time.sleep(1)
                    continue
                print_utils.okgreen("mesos master is ready")
                return
            except Exception:
                pass

        assert False, "timed out waiting for mesos master leader"
Esempio n. 10
0
    def wait_for_all_agents_to_register(self, agent_count=3, timeout_secs=300):
        """
        util method to wait for all agents to register
        """
        port = self.config.get("local_master_port")
        url = "{}:{}/state.json".format(utils.HTTP_LOCALHOST, port)
        print_utils.warn("waiting for all mesos agents")

        deadline = time.time() + timeout_secs
        while time.time() < deadline:
            try:
                resp = requests.get(url)
                if resp.status_code == 200:
                    registered_agents = 0
                    for a in resp.json()['slaves']:
                        if a['active']:
                            registered_agents += 1

                    if registered_agents == agent_count:
                        print_utils.okgreen("all mesos agents are ready")
                        return
                time.sleep(1)
            except Exception:
                pass
Esempio n. 11
0
def teardown():
    # 1 - Remove jobmgr instances
    for i in range(0, config["peloton_jobmgr_instance_count"]):
        name = config["peloton_jobmgr_container"] + repr(i)
        utils.remove_existing_container(name)

    # 2 - Remove placement engine instances
    for i in range(0, len(config["peloton_placement_instances"])):
        name = config["peloton_placement_container"] + repr(i)
        utils.remove_existing_container(name)

    # 3 - Remove resmgr instances
    for i in range(0, config["peloton_resmgr_instance_count"]):
        name = config["peloton_resmgr_container"] + repr(i)
        utils.remove_existing_container(name)

    # 4 - Remove hostmgr instances
    for i in range(0, config["peloton_hostmgr_instance_count"]):
        name = config["peloton_hostmgr_container"] + repr(i)
        utils.remove_existing_container(name)

    # 5 - Remove archiver instances
    for i in range(0, config["peloton_archiver_instance_count"]):
        name = config["peloton_archiver_container"] + repr(i)
        utils.remove_existing_container(name)

    # 6 - Remove aurorabridge instances
    for i in range(0, config["peloton_aurorabridge_instance_count"]):
        name = config["peloton_aurorabridge_container"] + repr(i)
        utils.remove_existing_container(name)

    minicluster.teardown_mesos(config)
    minicluster.teardown_k8s()

    utils.remove_existing_container(config["cassandra_container"])
    print_utils.okgreen("teardown complete!")
Esempio n. 12
0
def run_mesos(config):
    # Remove existing containers first.
    teardown_mesos(config)

    # Run zk
    cli.pull(config["zk_image"])
    container = cli.create_container(
        name=config["zk_container"],
        hostname=config["zk_container"],
        host_config=cli.create_host_config(
            port_bindings={config["default_zk_port"]: config["local_zk_port"]
                           }),
        image=config["zk_image"],
        detach=True,
    )
    cli.start(container=container.get("Id"))
    print_utils.okgreen("started container %s" % config["zk_container"])

    # TODO: add retry
    print_utils.okblue("sleep 20 secs for zk to come up")
    time.sleep(20)

    # Run mesos master
    cli.pull(config["mesos_master_image"])
    container = cli.create_container(
        name=config["mesos_master_container"],
        hostname=config["mesos_master_container"],
        volumes=["/files"],
        ports=[repr(config["master_port"])],
        host_config=cli.create_host_config(
            port_bindings={config["master_port"]: config["master_port"]},
            binds=[
                work_dir + "/files:/files",
                work_dir + "/mesos_config/etc_mesos-master:/etc/mesos-master",
            ],
            privileged=True,
        ),
        environment=[
            "MESOS_AUTHENTICATE_HTTP_READWRITE=true",
            "MESOS_AUTHENTICATE_FRAMEWORKS=true",
            # TODO: Enable following flags for fully authentication.
            "MESOS_AUTHENTICATE_HTTP_FRAMEWORKS=true",
            "MESOS_HTTP_FRAMEWORK_AUTHENTICATORS=basic",
            "MESOS_CREDENTIALS=/etc/mesos-master/credentials",
            "MESOS_LOG_DIR=" + config["log_dir"],
            "MESOS_PORT=" + repr(config["master_port"]),
            "MESOS_ZK=zk://{0}:{1}/mesos".format(
                utils.get_container_ip(config["zk_container"]),
                config["default_zk_port"],
            ),
            "MESOS_QUORUM=" + repr(config["quorum"]),
            "MESOS_REGISTRY=" + config["registry"],
            "MESOS_WORK_DIR=" + config["work_dir"],
        ],
        image=config["mesos_master_image"],
        entrypoint="bash /files/run_mesos_master.sh",
        detach=True,
    )
    cli.start(container=container.get("Id"))
    master_container = config["mesos_master_container"]
    print_utils.okgreen("started container %s" % master_container)

    # Run mesos slaves
    cli.pull(config['mesos_slave_image'])
    for i in range(0, config['num_agents']):
        run_mesos_agent(config, i, i)
    for i in range(0, config.get('num_exclusive_agents', 0)):
        run_mesos_agent(config,
                        i,
                        config['num_agents'] + i,
                        is_exclusive=True,
                        exclusive_label_value=config.get(
                            'exclusive_label_value', ''))
Esempio n. 13
0
 def _setup_k8s(self):
     print_utils.okgreen("starting k8s cluster")
     self.k8s.teardown()
     self.k8s.create()
     print_utils.okgreen("started k8s cluster")
Esempio n. 14
0
def run_k8s():
    print_utils.okgreen("starting k8s cluster")
    k8s = kind.Kind(PELOTON_K8S_NAME)
    k8s.teardown()
    k8s.create()
    print_utils.okgreen("started k8s cluster")