def _setup_zk(self): config = self.config self.cli.pull(config["zk_image"]) container = self.cli.create_container( name=config["zk_container"], hostname=config["zk_container"], host_config=self.cli.create_host_config( port_bindings={ config["default_zk_port"]: config["local_zk_port"], }, ), image=config["zk_image"], detach=True, ) self.cli.start(container=container.get("Id")) print_utils.okgreen("started container %s" % config["zk_container"]) print_utils.okgreen("waiting on %s to be rdy" % config["zk_container"]) count = 0 while count < utils.max_retry_attempts: count += 1 if utils.is_zk_ready(config["local_zk_port"]): return time.sleep(utils.sleep_time_secs) raise Exception("zk failed to come up in time")
def create_cassandra_store(config): retry_attempts = 0 while retry_attempts < utils.max_retry_attempts: time.sleep(utils.sleep_time_secs) setup_exe = cli.exec_create( container=config["cassandra_container"], cmd="/files/setup_cassandra.sh", ) show_exe = cli.exec_create( container=config["cassandra_container"], cmd='cqlsh -e "describe %s"' % config["cassandra_test_db"], ) # by api design, exec_start needs to be called after exec_create # to run 'docker exec' resp = cli.exec_start(exec_id=setup_exe) if resp is "": resp = cli.exec_start(exec_id=show_exe) if "CREATE KEYSPACE peloton_test WITH" in resp: print_utils.okgreen("cassandra store is created") return print_utils.warn("failed to create cassandra store, retrying...") retry_attempts += 1 print_utils.fail("Failed to create cassandra store after %d attempts, " "aborting..." % utils.max_retry_attempts) sys.exit(1)
def _setup_cassandra(self): config = self.config self.cli.remove_existing_container(config["cassandra_container"]) self.cli.pull(config["cassandra_image"]) container = self.cli.create_container( name=config["cassandra_container"], hostname=config["cassandra_container"], host_config=self.cli.create_host_config( port_bindings={ config["cassandra_cql_port"]: config[ "local_cassandra_cql_port" ], config["cassandra_thrift_port"]: config[ "local_cassandra_thrift_port" ], }, binds=[work_dir + "/files:/files"], ), environment=["MAX_HEAP_SIZE=1G", "HEAP_NEWSIZE=256M"], image=config["cassandra_image"], detach=True, entrypoint="bash /files/run_cassandra_with_stratio_index.sh", ) self.cli.start(container=container.get("Id")) print_utils.okgreen("started container %s" % config["cassandra_container"]) self._create_cassandra_store()
def run_mesos_agent( config, agent_index, port_offset, is_exclusive=False, exclusive_label_value="", ): prefix = config["mesos_agent_container"] attributes = config["attributes"] if is_exclusive: prefix += "-exclusive" attributes += ";peloton/exclusive:" + exclusive_label_value agent = prefix + repr(agent_index) port = config["local_agent_port"] + port_offset container = cli.create_container( name=agent, hostname=agent, volumes=["/files", "/var/run/docker.sock"], ports=[repr(config["default_agent_port"])], host_config=cli.create_host_config( port_bindings={config["default_agent_port"]: port}, binds=[ work_dir + "/files:/files", work_dir + "/mesos_config/etc_mesos-slave:/etc/mesos-slave", "/var/run/docker.sock:/var/run/docker.sock", ], privileged=True, ), environment=[ "MESOS_PORT=" + repr(port), "MESOS_MASTER=zk://{0}:{1}/mesos".format( utils.get_container_ip(config["zk_container"]), config["default_zk_port"], ), "MESOS_SWITCH_USER="******"switch_user"]), "MESOS_CONTAINERIZERS=" + config["containers"], "MESOS_LOG_DIR=" + config["log_dir"], "MESOS_ISOLATION=" + config["isolation"], "MESOS_SYSTEMD_ENABLE_SUPPORT=false", "MESOS_IMAGE_PROVIDERS=" + config["image_providers"], "MESOS_IMAGE_PROVISIONER_BACKEND={0}".format( config["image_provisioner_backend"]), "MESOS_APPC_STORE_DIR=" + config["appc_store_dir"], "MESOS_WORK_DIR=" + config["work_dir"], "MESOS_RESOURCES=" + config["resources"], "MESOS_ATTRIBUTES=" + attributes, "MESOS_MODULES=" + config["modules"], "MESOS_RESOURCE_ESTIMATOR=" + config["resource_estimator"], "MESOS_OVERSUBSCRIBED_RESOURCES_INTERVAL=" + config["oversubscribed_resources_interval"], "MESOS_QOS_CONTROLLER=" + config["qos_controller"], "MESOS_QOS_CORRECTION_INTERVAL_MIN=" + config["qos_correction_interval_min"], ], image=config["mesos_slave_image"], entrypoint="bash /files/run_mesos_slave.sh", detach=True, ) cli.start(container=container.get("Id")) print_utils.okgreen("started container %s" % agent)
def teardown(self, stop=False): print_utils.okgreen("teardown started...") self._teardown_peloton(stop) self._teardown_mesos() self._teardown_k8s() self._teardown_cassandra() self._teardown_zk() print_utils.okgreen("teardown complete!")
def teardown(stop=False): if stop: # Stop existing container func = utils.stop_container else: # Remove existing container func = utils.remove_existing_container # 1 - Remove jobmgr instances for i in range(0, config["peloton_jobmgr_instance_count"]): name = config["peloton_jobmgr_container"] + repr(i) func(name) # 2 - Remove placement engine instances for i in range(0, len(config["peloton_placement_instances"])): name = config["peloton_placement_container"] + repr(i) func(name) # 3 - Remove resmgr instances for i in range(0, config["peloton_resmgr_instance_count"]): name = config["peloton_resmgr_container"] + repr(i) func(name) # 4 - Remove hostmgr instances for i in range(0, config["peloton_hostmgr_instance_count"]): name = config["peloton_hostmgr_container"] + repr(i) func(name) # 5 - Remove archiver instances for i in range(0, config["peloton_archiver_instance_count"]): name = config["peloton_archiver_container"] + repr(i) func(name) # 6 - Remove aurorabridge instances for i in range(0, config["peloton_aurorabridge_instance_count"]): name = config["peloton_aurorabridge_container"] + repr(i) func(name) # 7 - Remove apiproxy instances for i in range(0, config["peloton_apiproxy_instance_count"]): name = config["peloton_apiproxy_container"] + repr(i) func(name) minicluster.teardown_mesos(config) minicluster.teardown_k8s() utils.remove_existing_container(config["cassandra_container"]) print_utils.okgreen("teardown complete!")
def wait_for_up(app, port): count = 0 error = "" url = "http://%s:%s/%s" % (default_host, port, healthcheck_path) while count < max_retry_attempts: try: r = requests.get(url) if r.status_code == 200: print_utils.okgreen("started %s" % app) return except Exception as e: print_utils.warn("app %s is not up yet, retrying..." % app) error = str(e) time.sleep(sleep_time_secs) count += 1 raise Exception("failed to start %s on %d after %d attempts, err: %s" % (app, port, max_retry_attempts, error))
def _setup_mesos_master(self): config = self.config self.cli.pull(config["mesos_master_image"]) container = self.cli.create_container( name=config["mesos_master_container"], hostname=config["mesos_master_container"], volumes=["/files"], ports=[repr(config["master_port"])], host_config=self.cli.create_host_config( port_bindings={config["master_port"]: config[ "local_master_port" ]}, binds=[ work_dir + "/files:/files", work_dir + "/mesos_config/etc_mesos-master:/etc/mesos-master", ], privileged=True, ), environment=[ "MESOS_AUTHENTICATE_HTTP_READWRITE=true", "MESOS_AUTHENTICATE_FRAMEWORKS=true", # TODO: Enable following flags for fully authentication. "MESOS_AUTHENTICATE_HTTP_FRAMEWORKS=true", "MESOS_HTTP_FRAMEWORK_AUTHENTICATORS=basic", "MESOS_CREDENTIALS=/etc/mesos-master/credentials", "MESOS_LOG_DIR=" + config["log_dir"], "MESOS_PORT=" + repr(config["master_port"]), "MESOS_ZK=zk://{0}:{1}/mesos".format( self.cli.get_container_ip(config["zk_container"]), config["default_zk_port"], ), "MESOS_QUORUM=" + repr(config["quorum"]), "MESOS_REGISTRY=" + config["registry"], "MESOS_WORK_DIR=" + config["work_dir"], ], image=config["mesos_master_image"], entrypoint="bash /files/run_mesos_master.sh", detach=True, ) self.cli.start(container=container.get("Id")) master_container = config["mesos_master_container"] print_utils.okgreen("started container %s" % master_container)
def wait_for_mesos_master_leader(self, timeout_secs=20): """ util method to wait for mesos master leader elected """ port = self.config.get("local_master_port") url = "{}:{}/state.json".format(utils.HTTP_LOCALHOST, port) print_utils.warn("waiting for mesos master leader") deadline = time.time() + timeout_secs while time.time() < deadline: try: resp = requests.get(url) if resp.status_code != 200: time.sleep(1) continue print_utils.okgreen("mesos master is ready") return except Exception: pass assert False, "timed out waiting for mesos master leader"
def wait_for_all_agents_to_register(self, agent_count=3, timeout_secs=300): """ util method to wait for all agents to register """ port = self.config.get("local_master_port") url = "{}:{}/state.json".format(utils.HTTP_LOCALHOST, port) print_utils.warn("waiting for all mesos agents") deadline = time.time() + timeout_secs while time.time() < deadline: try: resp = requests.get(url) if resp.status_code == 200: registered_agents = 0 for a in resp.json()['slaves']: if a['active']: registered_agents += 1 if registered_agents == agent_count: print_utils.okgreen("all mesos agents are ready") return time.sleep(1) except Exception: pass
def teardown(): # 1 - Remove jobmgr instances for i in range(0, config["peloton_jobmgr_instance_count"]): name = config["peloton_jobmgr_container"] + repr(i) utils.remove_existing_container(name) # 2 - Remove placement engine instances for i in range(0, len(config["peloton_placement_instances"])): name = config["peloton_placement_container"] + repr(i) utils.remove_existing_container(name) # 3 - Remove resmgr instances for i in range(0, config["peloton_resmgr_instance_count"]): name = config["peloton_resmgr_container"] + repr(i) utils.remove_existing_container(name) # 4 - Remove hostmgr instances for i in range(0, config["peloton_hostmgr_instance_count"]): name = config["peloton_hostmgr_container"] + repr(i) utils.remove_existing_container(name) # 5 - Remove archiver instances for i in range(0, config["peloton_archiver_instance_count"]): name = config["peloton_archiver_container"] + repr(i) utils.remove_existing_container(name) # 6 - Remove aurorabridge instances for i in range(0, config["peloton_aurorabridge_instance_count"]): name = config["peloton_aurorabridge_container"] + repr(i) utils.remove_existing_container(name) minicluster.teardown_mesos(config) minicluster.teardown_k8s() utils.remove_existing_container(config["cassandra_container"]) print_utils.okgreen("teardown complete!")
def run_mesos(config): # Remove existing containers first. teardown_mesos(config) # Run zk cli.pull(config["zk_image"]) container = cli.create_container( name=config["zk_container"], hostname=config["zk_container"], host_config=cli.create_host_config( port_bindings={config["default_zk_port"]: config["local_zk_port"] }), image=config["zk_image"], detach=True, ) cli.start(container=container.get("Id")) print_utils.okgreen("started container %s" % config["zk_container"]) # TODO: add retry print_utils.okblue("sleep 20 secs for zk to come up") time.sleep(20) # Run mesos master cli.pull(config["mesos_master_image"]) container = cli.create_container( name=config["mesos_master_container"], hostname=config["mesos_master_container"], volumes=["/files"], ports=[repr(config["master_port"])], host_config=cli.create_host_config( port_bindings={config["master_port"]: config["master_port"]}, binds=[ work_dir + "/files:/files", work_dir + "/mesos_config/etc_mesos-master:/etc/mesos-master", ], privileged=True, ), environment=[ "MESOS_AUTHENTICATE_HTTP_READWRITE=true", "MESOS_AUTHENTICATE_FRAMEWORKS=true", # TODO: Enable following flags for fully authentication. "MESOS_AUTHENTICATE_HTTP_FRAMEWORKS=true", "MESOS_HTTP_FRAMEWORK_AUTHENTICATORS=basic", "MESOS_CREDENTIALS=/etc/mesos-master/credentials", "MESOS_LOG_DIR=" + config["log_dir"], "MESOS_PORT=" + repr(config["master_port"]), "MESOS_ZK=zk://{0}:{1}/mesos".format( utils.get_container_ip(config["zk_container"]), config["default_zk_port"], ), "MESOS_QUORUM=" + repr(config["quorum"]), "MESOS_REGISTRY=" + config["registry"], "MESOS_WORK_DIR=" + config["work_dir"], ], image=config["mesos_master_image"], entrypoint="bash /files/run_mesos_master.sh", detach=True, ) cli.start(container=container.get("Id")) master_container = config["mesos_master_container"] print_utils.okgreen("started container %s" % master_container) # Run mesos slaves cli.pull(config['mesos_slave_image']) for i in range(0, config['num_agents']): run_mesos_agent(config, i, i) for i in range(0, config.get('num_exclusive_agents', 0)): run_mesos_agent(config, i, config['num_agents'] + i, is_exclusive=True, exclusive_label_value=config.get( 'exclusive_label_value', ''))
def _setup_k8s(self): print_utils.okgreen("starting k8s cluster") self.k8s.teardown() self.k8s.create() print_utils.okgreen("started k8s cluster")
def run_k8s(): print_utils.okgreen("starting k8s cluster") k8s = kind.Kind(PELOTON_K8S_NAME) k8s.teardown() k8s.create() print_utils.okgreen("started k8s cluster")