def test_scheduler_runs(): """ Verifies that the scheduler successfully launches 3 "no-op" MySQL tasks. NOTE: Due to the limitation of zake the scheduler's ZK operations are not propagated to executors in separate processes but they are unit-tested separately. """ import mesos.native # Make sure fake_mysos_executor.pex is available to be fetched by Mesos slave. assert os.path.isfile('dist/fake_mysos_executor.pex') storage = FakeStorage(SequentialThreadingHandler()) zk_client = FakeClient(storage=storage) zk_client.start() zk_url = "zk://fake_host/home/mysos/clusters" cluster_name = "test_cluster" num_nodes = 3 state_provider = LocalStateProvider(safe_mkdtemp()) framework_info = FrameworkInfo( user=getpass.getuser(), name="mysos", checkpoint=False) state = Scheduler(framework_info) scheduler = MysosScheduler( state, state_provider, getpass.getuser(), os.path.abspath("dist/fake_mysos_executor.pex"), "./fake_mysos_executor.pex", zk_client, zk_url, Amount(40, Time.SECONDS), "/fakepath", gen_encryption_key()) scheduler_driver = mesos.native.MesosSchedulerDriver( scheduler, framework_info, "local") scheduler_driver.start() # Wait until the scheduler is connected and becomes available. assert scheduler.connected.wait(30) scheduler.create_cluster(cluster_name, "mysql_user", num_nodes) # A slave is promoted to be the master. deadline( lambda: wait_for_master( get_cluster_path(posixpath.join(zk_url, 'discover'), cluster_name), zk_client), Amount(40, Time.SECONDS)) assert scheduler_driver.stop() == DRIVER_STOPPED
def test_incompatible_resource_role(self): scheduler1 = MysosScheduler( self._state, self._state_provider, self._framework_user, "./executor.pex", "cmd.sh", self._zk_client, self._zk_url, Amount(5, Time.SECONDS), "/etc/mysos/admin_keyfile.yml", gen_encryption_key(), framework_role='mysos' ) # Require 'mysos' but the resources are in '*'. scheduler1.registered(self._driver, self._framework_id, object()) scheduler1.create_cluster("cluster1", "mysql_user", 3) scheduler1.resourceOffers(self._driver, [self._offer]) assert "declineOffer" in self._driver.method_calls assert len(self._driver.method_calls["declineOffer"]) == 1 # [0][0][1]: [First declineOffer call][The positional args][The first positional arg], which is # a 'Filters' object. assert (self._driver.method_calls["declineOffer"][0][0][1]. refuse_seconds == INCOMPATIBLE_ROLE_OFFER_REFUSE_DURATION.as_( Time.SECONDS))
def test_scheduler_runs(): """ Verifies that the scheduler successfully launches 3 "no-op" MySQL tasks. NOTE: Due to the limitation of zake the scheduler's ZK operations are not propagated to executors in separate processes but they are unit-tested separately. """ import mesos.native # Make sure fake_mysos_executor.pex is available to be fetched by Mesos slave. assert os.path.isfile('dist/fake_mysos_executor.pex') storage = FakeStorage(SequentialThreadingHandler()) zk_client = FakeClient(storage=storage) zk_client.start() zk_url = "zk://fake_host/home/mysos/clusters" cluster_name = "test_cluster" num_nodes = 3 state_provider = LocalStateProvider(safe_mkdtemp()) framework_info = FrameworkInfo(user=getpass.getuser(), name="mysos", checkpoint=False) state = Scheduler(framework_info) scheduler = MysosScheduler(state, state_provider, getpass.getuser(), os.path.abspath("dist/fake_mysos_executor.pex"), "./fake_mysos_executor.pex", zk_client, zk_url, Amount(40, Time.SECONDS), "/fakepath", gen_encryption_key()) scheduler_driver = mesos.native.MesosSchedulerDriver( scheduler, framework_info, "local") scheduler_driver.start() # Wait until the scheduler is connected and becomes available. assert scheduler.connected.wait(30) scheduler.create_cluster(cluster_name, "mysql_user", num_nodes) # A slave is promoted to be the master. deadline( lambda: wait_for_master( get_cluster_path(posixpath.join(zk_url, 'discover'), cluster_name), zk_client), Amount(40, Time.SECONDS)) assert scheduler_driver.stop() == DRIVER_STOPPED
def test_create_cluster_invalid_user(self): self._scheduler.set_exception(MysosScheduler.InvalidUser()) with pytest.raises(AppError) as e: self._app.post('/clusters/test_cluster', { 'num_nodes': 3, 'cluster_user': '******' }) assert e.value.message.startswith('Bad response: 400')
def test_incompatible_resource_role(self): scheduler1 = MysosScheduler( self._state, self._state_provider, self._framework_user, "./executor.pex", "cmd.sh", self._zk_client, self._zk_url, Amount(5, Time.SECONDS), "/etc/mysos/admin_keyfile.yml", gen_encryption_key(), framework_role='mysos') # Require 'mysos' but the resources are in '*'. RootMetrics().register_observable('scheduler', scheduler1) scheduler1.registered(self._driver, self._framework_id, object()) scheduler1.create_cluster("cluster1", "mysql_user", 3) scheduler1.resourceOffers(self._driver, [self._offer]) assert "declineOffer" in self._driver.method_calls assert len(self._driver.method_calls["declineOffer"]) == 1 # [0][0][1]: [First declineOffer call][The positional args][The first positional arg], which is # a 'Filters' object. assert (self._driver.method_calls["declineOffer"][0][0][1].refuse_seconds == INCOMPATIBLE_ROLE_OFFER_REFUSE_DURATION.as_(Time.SECONDS)) sample = RootMetrics().sample() assert sample['scheduler.offers_incompatible_role'] == 1
def test_scheduler_delete_empty_cluster(self): scheduler_key = gen_encryption_key() scheduler = MysosScheduler(self._state, self._state_provider, self._framework_user, "./executor.pex", "cmd.sh", self._zk_client, self._zk_url, Amount(5, Time.SECONDS), "/etc/mysos/admin_keyfile.yml", scheduler_key) scheduler.registered(self._driver, self._framework_id, object()) _, password = scheduler.create_cluster("cluster1", "mysql_user", 3) assert len(scheduler._launchers) == 1 # Deleting the cluster before any offer comes in for launching any task. scheduler.delete_cluster("cluster1", password) assert len(scheduler._launchers) == 0
def test_scheduler_metrics(self): scheduler_key = gen_encryption_key() scheduler = MysosScheduler(self._state, self._state_provider, self._framework_user, "./executor.pex", "cmd.sh", self._zk_client, self._zk_url, Amount(5, Time.SECONDS), "/etc/mysos/admin_keyfile.yml", scheduler_key) RootMetrics().register_observable('scheduler', scheduler) scheduler.registered(self._driver, self._framework_id, object()) scheduler.create_cluster("cluster1", "mysql_user", 3, cluster_password='******') sample = RootMetrics().sample() assert sample['scheduler.cluster_count'] == 1 assert sample[ 'scheduler.total_requested_mem_mb'] == DEFAULT_TASK_MEM.as_( Data.MB) * 3 assert sample[ 'scheduler.total_requested_disk_mb'] == DEFAULT_TASK_DISK.as_( Data.MB) * 3 assert sample[ 'scheduler.total_requested_cpus'] == DEFAULT_TASK_CPUS * 3 scheduler.delete_cluster("cluster1", 'test_password') sample = RootMetrics().sample() assert sample['scheduler.cluster_count'] == 0 assert sample['scheduler.total_requested_mem_mb'] == 0 assert sample['scheduler.total_requested_disk_mb'] == 0 assert sample['scheduler.total_requested_cpus'] == 0
def test_scheduler_delete_empty_cluster(self): scheduler_key = gen_encryption_key() scheduler = MysosScheduler( self._state, self._state_provider, self._framework_user, "./executor.pex", "cmd.sh", self._zk_client, self._zk_url, Amount(5, Time.SECONDS), "/etc/mysos/admin_keyfile.yml", scheduler_key) scheduler.registered(self._driver, self._framework_id, object()) _, password = scheduler.create_cluster("cluster1", "mysql_user", 3) assert len(scheduler._launchers) == 1 # Deleting the cluster before any offer comes in for launching any task. scheduler.delete_cluster("cluster1", password) assert len(scheduler._launchers) == 0
def test_scheduler_recovery(self): scheduler1 = MysosScheduler(self._state, self._state_provider, self._framework_user, "./executor.pex", "cmd.sh", self._zk_client, self._zk_url, Amount(5, Time.SECONDS), "/etc/mysos/admin_keyfile.yml") scheduler1.registered(self._driver, self._framework_id, object()) scheduler1.create_cluster("cluster1", "mysql_user", 3) scheduler1.resourceOffers(self._driver, [self._offer]) # One task is launched for one offer. assert len(scheduler1._launchers["cluster1"]._cluster.tasks) == 1 with pytest.raises(MysosScheduler.ClusterExists): scheduler1.create_cluster("cluster1", "mysql_user", 3) # FrameworkID should have been persisted. self._state = self._state_provider.load_scheduler_state() assert self._state.framework_info.id.value == self._framework_id.value # Simulate restart. scheduler2 = MysosScheduler(self._state, self._state_provider, self._framework_user, "./executor.pex", "cmd.sh", self._zk_client, self._zk_url, Amount(5, Time.SECONDS), "/etc/mysos/admin_keyfile.yml") # Scheduler always receives registered() with the same FrameworkID after failover. scheduler2.registered(self._driver, self._framework_id, object()) assert len(scheduler2._launchers) == 1 assert scheduler2._launchers["cluster1"].cluster_name == "cluster1" # Scheduler has recovered the cluster so it doesn't accept another of the same name. with pytest.raises(MysosScheduler.ClusterExists): scheduler2.create_cluster("cluster1", "mysql_user", 3)
def test_scheduler_recovery_failure_before_launch(self): scheduler1 = MysosScheduler(self._state, self._state_provider, self._framework_user, "./executor.pex", "cmd.sh", self._zk_client, self._zk_url, Amount(5, Time.SECONDS), "/etc/mysos/admin_keyfile.yml") scheduler1.registered(self._driver, self._framework_id, object()) scheduler1.create_cluster("cluster1", "mysql_user", 3) # Simulate restart before the task is successfully launched. scheduler2 = MysosScheduler(self._state, self._state_provider, self._framework_user, "./executor.pex", "cmd.sh", self._zk_client, self._zk_url, Amount(5, Time.SECONDS), "/etc/mysos/admin_keyfile.yml") assert len(scheduler2._launchers) == 0 # No launchers are recovered. # Scheduler always receives registered() with the same FrameworkID after failover. scheduler2.registered(self._driver, self._framework_id, object()) assert len(scheduler2._launchers) == 1 assert scheduler2._launchers["cluster1"].cluster_name == "cluster1" # Now offer the resources for this task. scheduler2.resourceOffers(self._driver, [self._offer]) # One task is launched for the offer. assert len( scheduler2._launchers["cluster1"]._cluster.active_tasks) == 1 # Scheduler has recovered the cluster so it doesn't accept another of the same name. with pytest.raises(MysosScheduler.ClusterExists): scheduler2.create_cluster("cluster1", "mysql_user", 3)
def test_scheduler_recovery(self): scheduler1 = MysosScheduler( self._state, self._state_provider, self._framework_user, "./executor.pex", "cmd.sh", self._zk_client, self._zk_url, Amount(5, Time.SECONDS), "/etc/mysos/admin_keyfile.yml") scheduler1.registered(self._driver, self._framework_id, object()) scheduler1.create_cluster("cluster1", "mysql_user", 3) scheduler1.resourceOffers(self._driver, [self._offer]) # One task is launched for one offer. assert len(scheduler1._launchers["cluster1"]._cluster.tasks) == 1 with pytest.raises(MysosScheduler.ClusterExists): scheduler1.create_cluster("cluster1", "mysql_user", 3) # FrameworkID should have been persisted. self._state = self._state_provider.load_scheduler_state() assert self._state.framework_info.id.value == self._framework_id.value # Simulate restart. scheduler2 = MysosScheduler( self._state, self._state_provider, self._framework_user, "./executor.pex", "cmd.sh", self._zk_client, self._zk_url, Amount(5, Time.SECONDS), "/etc/mysos/admin_keyfile.yml") # Scheduler always receives registered() with the same FrameworkID after failover. scheduler2.registered(self._driver, self._framework_id, object()) assert len(scheduler2._launchers) == 1 assert scheduler2._launchers["cluster1"].cluster_name == "cluster1" # Scheduler has recovered the cluster so it doesn't accept another of the same name. with pytest.raises(MysosScheduler.ClusterExists): scheduler2.create_cluster("cluster1", "mysql_user", 3)
def test_scheduler_recovery_failure_before_launch(self): scheduler1 = MysosScheduler( self._state, self._state_provider, self._framework_user, "./executor.pex", "cmd.sh", self._zk_client, self._zk_url, Amount(5, Time.SECONDS), "/etc/mysos/admin_keyfile.yml") scheduler1.registered(self._driver, self._framework_id, object()) scheduler1.create_cluster("cluster1", "mysql_user", 3) # Simulate restart before the task is successfully launched. scheduler2 = MysosScheduler( self._state, self._state_provider, self._framework_user, "./executor.pex", "cmd.sh", self._zk_client, self._zk_url, Amount(5, Time.SECONDS), "/etc/mysos/admin_keyfile.yml") assert len(scheduler2._launchers) == 0 # No launchers are recovered. # Scheduler always receives registered() with the same FrameworkID after failover. scheduler2.registered(self._driver, self._framework_id, object()) assert len(scheduler2._launchers) == 1 assert scheduler2._launchers["cluster1"].cluster_name == "cluster1" # Now offer the resources for this task. scheduler2.resourceOffers(self._driver, [self._offer]) # One task is launched for the offer. assert len(scheduler2._launchers["cluster1"]._cluster.active_tasks) == 1 # Scheduler has recovered the cluster so it doesn't accept another of the same name. with pytest.raises(MysosScheduler.ClusterExists): scheduler2.create_cluster("cluster1", "mysql_user", 3)
def test_scheduler_metrics(self): scheduler_key = gen_encryption_key() scheduler = MysosScheduler( self._state, self._state_provider, self._framework_user, "./executor.pex", "cmd.sh", self._zk_client, self._zk_url, Amount(5, Time.SECONDS), "/etc/mysos/admin_keyfile.yml", scheduler_key) RootMetrics().register_observable('scheduler', scheduler) scheduler.registered(self._driver, self._framework_id, object()) sample = RootMetrics().sample() assert sample['scheduler.framework_registered'] == 1 scheduler.create_cluster( "cluster1", "mysql_user", 3, cluster_password='******') sample = RootMetrics().sample() assert sample['scheduler.cluster_count'] == 1 assert sample['scheduler.total_requested_mem_mb'] == DEFAULT_TASK_MEM.as_(Data.MB) * 3 assert sample['scheduler.total_requested_disk_mb'] == DEFAULT_TASK_DISK.as_(Data.MB) * 3 assert sample['scheduler.total_requested_cpus'] == DEFAULT_TASK_CPUS * 3 scheduler.resourceOffers(self._driver, [self._offer]) sample = RootMetrics().sample() assert sample['scheduler.resource_offers'] == 1 assert sample['scheduler.tasks_launched'] == 1 status = mesos_pb2.TaskStatus() status.state = mesos_pb2.TASK_RUNNING status.slave_id.value = self._offer.slave_id.value status.task_id.value = 'mysos-cluster1-0' scheduler.statusUpdate(self._driver, status) status.state = mesos_pb2.TASK_FAILED scheduler.statusUpdate(self._driver, status) sample = RootMetrics().sample() assert sample['scheduler.tasks_failed'] == 1 scheduler.delete_cluster("cluster1", 'test_password') sample = RootMetrics().sample() assert sample['scheduler.cluster_count'] == 0 assert sample['scheduler.total_requested_mem_mb'] == 0 assert sample['scheduler.total_requested_disk_mb'] == 0 assert sample['scheduler.total_requested_cpus'] == 0