Example #1
0
    def setUp(self):
        self._driver = FakeDriver()
        self._storage = FakeStorage(SequentialThreadingHandler())
        self._zk_client = FakeClient(storage=self._storage)
        self._zk_client.start()

        self._framework_id = mesos_pb2.FrameworkID()
        self._framework_id.value = "framework_id_0"

        self._offer = mesos_pb2.Offer()
        self._offer.id.value = "offer_id_0"
        self._offer.framework_id.value = self._framework_id.value
        self._offer.slave_id.value = "slave_id_0"
        self._offer.hostname = "localhost"

        resources = create_resources(cpus=4,
                                     mem=512 * 3,
                                     ports=set([10000, 10001, 10002]))
        self._offer.resources.extend(resources)

        self._framework_user = "******"

        self._zk_url = "zk://host/mysos/test"
        self._cluster = MySQLCluster("cluster0", "user", "pass", 3)

        self._tmpdir = tempfile.mkdtemp()
        self._state_provider = LocalStateProvider(self._tmpdir)

        framework_info = mesos_pb2.FrameworkInfo(user=getpass.getuser(),
                                                 name="mysos",
                                                 checkpoint=False)
        self._state = Scheduler(framework_info)
Example #2
0
  def setUp(self):
    self._driver = FakeDriver()
    self._storage = FakeStorage(SequentialThreadingHandler())
    self._zk_client = FakeClient(storage=self._storage)
    self._zk_client.start()

    self._framework_id = mesos_pb2.FrameworkID()
    self._framework_id.value = "framework_id_0"

    self._offer = mesos_pb2.Offer()
    self._offer.id.value = "offer_id_0"
    self._offer.framework_id.value = self._framework_id.value
    self._offer.slave_id.value = "slave_id_0"
    self._offer.hostname = "localhost"

    resources = create_resources(cpus=4, mem=512 * 3, ports=set([10000, 10001, 10002]))
    self._offer.resources.extend(resources)

    self._framework_user = "******"

    self._zk_url = "zk://host/mysos/test"
    self._cluster = MySQLCluster("cluster0", "user", "pass", 3)

    self._tmpdir = tempfile.mkdtemp()
    self._state_provider = LocalStateProvider(self._tmpdir)

    framework_info = mesos_pb2.FrameworkInfo(
        user=getpass.getuser(),
        name="mysos",
        checkpoint=False)
    self._state = Scheduler(framework_info)
Example #3
0
    def setup(self, request):
        self._driver = FakeDriver()
        self._storage = FakeStorage(SequentialThreadingHandler())
        self._zk_client = FakeClient(storage=self._storage)
        self._zk_client.start()

        self._offer = mesos_pb2.Offer()
        self._offer.id.value = "offer_id_0"
        self._offer.framework_id.value = "framework_id_0"
        self._offer.slave_id.value = "slave_id_0"
        self._offer.hostname = "localhost"

        # Enough memory and ports to fit three tasks.
        resources = create_resources(cpus=4,
                                     mem=512 * 3,
                                     ports=set([10000, 10001, 10002]))
        self._offer.resources.extend(resources)

        self._framework_user = "******"

        # Some tests use the default launcher; some don't.
        self._zk_url = "zk://host/mysos/test"
        self._cluster = MySQLCluster("cluster0", "user", "pass", 3)

        # Construct the state provider based on the test parameter.
        if request.param == LocalStateProvider:
            tmpdir = tempfile.mkdtemp()
            self._state_provider = LocalStateProvider(tmpdir)
            request.addfinalizer(lambda: shutil.rmtree(tmpdir, True)
                                 )  # Clean up after ourselves.
        elif request.param == ZooKeeperStateProvider:
            self._state_provider = ZooKeeperStateProvider(
                self._zk_client, "/mysos/test")

        self._launcher = MySQLClusterLauncher(
            self._driver,
            self._cluster,
            self._state_provider,
            self._zk_url,
            self._zk_client,
            self._framework_user,
            "./executor.pex",
            "cmd.sh",
            Amount(5, Time.SECONDS),
            "/etc/mysos/admin_keyfile.yml",
            query_interval=Amount(150, Time.MILLISECONDS))  # Short interval.

        self._elected = threading.Event()
        self._launchers = [self._launcher]  # See teardown().

        request.addfinalizer(self.teardown)
Example #4
0
def test_scheduler_runs():
    """
    Verifies that the scheduler successfully launches 3 "no-op" MySQL tasks.
    NOTE: Due to the limitation of zake the scheduler's ZK operations are not propagated to
    executors in separate processes but they are unit-tested separately.
  """
    import mesos.native

    # Make sure fake_mysos_executor.pex is available to be fetched by Mesos slave.
    assert os.path.isfile('dist/fake_mysos_executor.pex')

    storage = FakeStorage(SequentialThreadingHandler())
    zk_client = FakeClient(storage=storage)
    zk_client.start()

    zk_url = "zk://fake_host/home/mysos/clusters"
    cluster_name = "test_cluster"
    num_nodes = 3

    state_provider = LocalStateProvider(safe_mkdtemp())

    framework_info = FrameworkInfo(user=getpass.getuser(),
                                   name="mysos",
                                   checkpoint=False)

    state = Scheduler(framework_info)

    scheduler = MysosScheduler(state, state_provider, getpass.getuser(),
                               os.path.abspath("dist/fake_mysos_executor.pex"),
                               "./fake_mysos_executor.pex", zk_client, zk_url,
                               Amount(40, Time.SECONDS), "/fakepath",
                               gen_encryption_key())

    scheduler_driver = mesos.native.MesosSchedulerDriver(
        scheduler, framework_info, "local")
    scheduler_driver.start()

    # Wait until the scheduler is connected and becomes available.
    assert scheduler.connected.wait(30)

    scheduler.create_cluster(cluster_name, "mysql_user", num_nodes)

    # A slave is promoted to be the master.
    deadline(
        lambda: wait_for_master(
            get_cluster_path(posixpath.join(zk_url, 'discover'), cluster_name),
            zk_client), Amount(40, Time.SECONDS))

    assert scheduler_driver.stop() == DRIVER_STOPPED
Example #5
0
class TestState(unittest.TestCase):
  def setUp(self):
    self._tmpdir = tempfile.mkdtemp()
    self._state_provider = LocalStateProvider(self._tmpdir)

  def tearDown(self):
    shutil.rmtree(self._tmpdir, True)

  def test_scheduler_state(self):
    expected = Scheduler(FrameworkInfo(
        user='******',
        name='test_fw_name',
        checkpoint=True))
    expected.clusters.add('cluster2')
    expected.clusters.add('cluster1')

    self._state_provider.dump_scheduler_state(expected)
    actual = self._state_provider.load_scheduler_state()

    assert expected.framework_info == actual.framework_info
    assert expected.clusters == actual.clusters

  def test_cluster_state(self):
    password_box = PasswordBox(gen_encryption_key())

    expected = MySQLCluster(
        'cluster1',
        'cluster_user',
        password_box.encrypt('cluster_password'),
        3,
        DEFAULT_TASK_CPUS,
        DEFAULT_TASK_MEM,
        DEFAULT_TASK_DISK)

    expected.tasks['task1'] = MySQLTask(
        'cluster1', 'task1', 'slave1', 'host1', 10000)

    self._state_provider.dump_cluster_state(expected)
    actual = self._state_provider.load_cluster_state('cluster1')

    assert expected.user == actual.user
    assert isinstance(actual.num_nodes, int)
    assert expected.num_nodes == actual.num_nodes
    assert len(expected.tasks) == len(actual.tasks)
    assert expected.tasks['task1'].port == actual.tasks['task1'].port
    assert expected.encrypted_password == actual.encrypted_password
    assert password_box.match('cluster_password', actual.encrypted_password)
Example #6
0
class TestState(unittest.TestCase):
    def setUp(self):
        self._tmpdir = tempfile.mkdtemp()
        self._state_provider = LocalStateProvider(self._tmpdir)

    def tearDown(self):
        shutil.rmtree(self._tmpdir, True)

    def test_scheduler_state(self):
        expected = Scheduler(
            FrameworkInfo(user='******',
                          name='test_fw_name',
                          checkpoint=True))
        expected.clusters.add('cluster2')
        expected.clusters.add('cluster1')

        self._state_provider.dump_scheduler_state(expected)
        actual = self._state_provider.load_scheduler_state()

        assert expected.framework_info == actual.framework_info
        assert expected.clusters == actual.clusters

    def test_cluster_state(self):
        password_box = PasswordBox(gen_encryption_key())

        expected = MySQLCluster('cluster1', 'cluster_user',
                                password_box.encrypt('cluster_password'), 3,
                                DEFAULT_TASK_CPUS, DEFAULT_TASK_MEM,
                                DEFAULT_TASK_DISK)

        expected.tasks['task1'] = MySQLTask('cluster1', 'task1', 'slave1',
                                            'host1', 10000)

        self._state_provider.dump_cluster_state(expected)
        actual = self._state_provider.load_cluster_state('cluster1')

        assert expected.user == actual.user
        assert isinstance(actual.num_nodes, int)
        assert expected.num_nodes == actual.num_nodes
        assert len(expected.tasks) == len(actual.tasks)
        assert expected.tasks['task1'].port == actual.tasks['task1'].port
        assert expected.encrypted_password == actual.encrypted_password
        assert password_box.match('cluster_password',
                                  actual.encrypted_password)
Example #7
0
 def setUp(self):
     self._tmpdir = tempfile.mkdtemp()
     self._state_provider = LocalStateProvider(self._tmpdir)
Example #8
0
class TestScheduler(unittest.TestCase):
    def setUp(self):
        self._driver = FakeDriver()
        self._storage = FakeStorage(SequentialThreadingHandler())
        self._zk_client = FakeClient(storage=self._storage)
        self._zk_client.start()

        self._framework_id = mesos_pb2.FrameworkID()
        self._framework_id.value = "framework_id_0"

        self._offer = mesos_pb2.Offer()
        self._offer.id.value = "offer_id_0"
        self._offer.framework_id.value = self._framework_id.value
        self._offer.slave_id.value = "slave_id_0"
        self._offer.hostname = "localhost"

        resources = create_resources(cpus=4,
                                     mem=512 * 3,
                                     ports=set([10000, 10001, 10002]))
        self._offer.resources.extend(resources)

        self._framework_user = "******"

        self._zk_url = "zk://host/mysos/test"
        self._cluster = MySQLCluster("cluster0", "user", "pass", 3)

        self._tmpdir = tempfile.mkdtemp()
        self._state_provider = LocalStateProvider(self._tmpdir)

        framework_info = mesos_pb2.FrameworkInfo(user=getpass.getuser(),
                                                 name="mysos",
                                                 checkpoint=False)
        self._state = Scheduler(framework_info)

    def tearDown(self):
        shutil.rmtree(self._tmpdir, True)  # Clean up after ourselves.

    def test_scheduler_recovery(self):
        scheduler1 = MysosScheduler(self._state, self._state_provider,
                                    self._framework_user, "./executor.pex",
                                    "cmd.sh", self._zk_client, self._zk_url,
                                    Amount(5, Time.SECONDS),
                                    "/etc/mysos/admin_keyfile.yml")
        scheduler1.registered(self._driver, self._framework_id, object())
        scheduler1.create_cluster("cluster1", "mysql_user", 3)
        scheduler1.resourceOffers(self._driver, [self._offer])

        # One task is launched for one offer.
        assert len(scheduler1._launchers["cluster1"]._cluster.tasks) == 1

        with pytest.raises(MysosScheduler.ClusterExists):
            scheduler1.create_cluster("cluster1", "mysql_user", 3)

        # FrameworkID should have been persisted.
        self._state = self._state_provider.load_scheduler_state()
        assert self._state.framework_info.id.value == self._framework_id.value

        # Simulate restart.
        scheduler2 = MysosScheduler(self._state, self._state_provider,
                                    self._framework_user, "./executor.pex",
                                    "cmd.sh", self._zk_client, self._zk_url,
                                    Amount(5, Time.SECONDS),
                                    "/etc/mysos/admin_keyfile.yml")

        # Scheduler always receives registered() with the same FrameworkID after failover.
        scheduler2.registered(self._driver, self._framework_id, object())

        assert len(scheduler2._launchers) == 1
        assert scheduler2._launchers["cluster1"].cluster_name == "cluster1"

        # Scheduler has recovered the cluster so it doesn't accept another of the same name.
        with pytest.raises(MysosScheduler.ClusterExists):
            scheduler2.create_cluster("cluster1", "mysql_user", 3)

    def test_scheduler_recovery_failure_before_launch(self):
        scheduler1 = MysosScheduler(self._state, self._state_provider,
                                    self._framework_user, "./executor.pex",
                                    "cmd.sh", self._zk_client, self._zk_url,
                                    Amount(5, Time.SECONDS),
                                    "/etc/mysos/admin_keyfile.yml")
        scheduler1.registered(self._driver, self._framework_id, object())
        scheduler1.create_cluster("cluster1", "mysql_user", 3)

        # Simulate restart before the task is successfully launched.
        scheduler2 = MysosScheduler(self._state, self._state_provider,
                                    self._framework_user, "./executor.pex",
                                    "cmd.sh", self._zk_client, self._zk_url,
                                    Amount(5, Time.SECONDS),
                                    "/etc/mysos/admin_keyfile.yml")

        assert len(scheduler2._launchers) == 0  # No launchers are recovered.

        # Scheduler always receives registered() with the same FrameworkID after failover.
        scheduler2.registered(self._driver, self._framework_id, object())

        assert len(scheduler2._launchers) == 1
        assert scheduler2._launchers["cluster1"].cluster_name == "cluster1"

        # Now offer the resources for this task.
        scheduler2.resourceOffers(self._driver, [self._offer])

        # One task is launched for the offer.
        assert len(
            scheduler2._launchers["cluster1"]._cluster.active_tasks) == 1

        # Scheduler has recovered the cluster so it doesn't accept another of the same name.
        with pytest.raises(MysosScheduler.ClusterExists):
            scheduler2.create_cluster("cluster1", "mysql_user", 3)

    def test_incompatible_resource_role(self):
        scheduler1 = MysosScheduler(
            self._state,
            self._state_provider,
            self._framework_user,
            "./executor.pex",
            "cmd.sh",
            self._zk_client,
            self._zk_url,
            Amount(5, Time.SECONDS),
            "/etc/mysos/admin_keyfile.yml",
            framework_role='mysos'
        )  # Require 'mysos' but the resources are in '*'.
        scheduler1.registered(self._driver, self._framework_id, object())
        scheduler1.create_cluster("cluster1", "mysql_user", 3)
        scheduler1.resourceOffers(self._driver, [self._offer])

        assert "declineOffer" in self._driver.method_calls
        assert len(self._driver.method_calls["declineOffer"]) == 1
        # [0][0][1]: [First declineOffer call][The positional args][The first positional arg], which is
        # a 'Filters' object.
        assert (self._driver.method_calls["declineOffer"][0][0][1].
                refuse_seconds == INCOMPATIBLE_ROLE_OFFER_REFUSE_DURATION.as_(
                    Time.SECONDS))
Example #9
0
class TestScheduler(unittest.TestCase):
    def setUp(self):
        self._driver = FakeDriver()
        self._storage = FakeStorage(SequentialThreadingHandler())
        self._zk_client = FakeClient(storage=self._storage)
        self._zk_client.start()

        self._framework_id = mesos_pb2.FrameworkID()
        self._framework_id.value = "framework_id_0"

        self._offer = mesos_pb2.Offer()
        self._offer.id.value = "offer_id_0"
        self._offer.framework_id.value = self._framework_id.value
        self._offer.slave_id.value = "slave_id_0"
        self._offer.hostname = "localhost"

        resources = create_resources(cpus=DEFAULT_TASK_CPUS * 3,
                                     mem=DEFAULT_TASK_MEM * 3,
                                     disk=DEFAULT_TASK_DISK * 3,
                                     ports=set([10000, 10001, 10002]))
        self._offer.resources.extend(resources)

        self._framework_user = "******"

        self._zk_url = "zk://host/mysos/test"
        self._cluster = MySQLCluster("cluster0", "user", "pass", 3,
                                     DEFAULT_TASK_CPUS, DEFAULT_TASK_MEM,
                                     DEFAULT_TASK_DISK)

        self._tmpdir = tempfile.mkdtemp()
        self._state_provider = LocalStateProvider(self._tmpdir)

        framework_info = mesos_pb2.FrameworkInfo(user=getpass.getuser(),
                                                 name="mysos",
                                                 checkpoint=False)
        self._state = Scheduler(framework_info)

    def tearDown(self):
        shutil.rmtree(self._tmpdir, True)  # Clean up after ourselves.

    def test_scheduler_recovery(self):
        scheduler_key = gen_encryption_key()

        scheduler1 = MysosScheduler(self._state, self._state_provider,
                                    self._framework_user, "./executor.pex",
                                    "cmd.sh", self._zk_client, self._zk_url,
                                    Amount(5, Time.SECONDS),
                                    "/etc/mysos/admin_keyfile.yml",
                                    scheduler_key)
        scheduler1.registered(self._driver, self._framework_id, object())
        scheduler1.create_cluster("cluster1", "mysql_user", 3)
        scheduler1.resourceOffers(self._driver, [self._offer])

        # One task is launched for one offer.
        assert len(scheduler1._launchers["cluster1"]._cluster.tasks) == 1

        with pytest.raises(MysosScheduler.ClusterExists):
            scheduler1.create_cluster("cluster1", "mysql_user", 3)

        # FrameworkID should have been persisted.
        self._state = self._state_provider.load_scheduler_state()
        assert self._state.framework_info.id.value == self._framework_id.value

        # Simulate restart.
        scheduler2 = MysosScheduler(self._state, self._state_provider,
                                    self._framework_user, "./executor.pex",
                                    "cmd.sh", self._zk_client, self._zk_url,
                                    Amount(5, Time.SECONDS),
                                    "/etc/mysos/admin_keyfile.yml",
                                    scheduler_key)

        # Scheduler always receives registered() with the same FrameworkID after failover.
        scheduler2.registered(self._driver, self._framework_id, object())

        assert len(scheduler2._launchers) == 1
        assert scheduler2._launchers["cluster1"].cluster_name == "cluster1"

        # Scheduler has recovered the cluster so it doesn't accept another of the same name.
        with pytest.raises(MysosScheduler.ClusterExists):
            scheduler2.create_cluster("cluster1", "mysql_user", 3)

    def test_scheduler_recovery_failure_before_launch(self):
        scheduler_key = gen_encryption_key()

        scheduler1 = MysosScheduler(self._state, self._state_provider,
                                    self._framework_user, "./executor.pex",
                                    "cmd.sh", self._zk_client, self._zk_url,
                                    Amount(5, Time.SECONDS),
                                    "/etc/mysos/admin_keyfile.yml",
                                    scheduler_key)
        scheduler1.registered(self._driver, self._framework_id, object())
        _, password = scheduler1.create_cluster("cluster1", "mysql_user", 3)

        # Simulate restart before the task is successfully launched.
        scheduler2 = MysosScheduler(self._state, self._state_provider,
                                    self._framework_user, "./executor.pex",
                                    "cmd.sh", self._zk_client, self._zk_url,
                                    Amount(5, Time.SECONDS),
                                    "/etc/mysos/admin_keyfile.yml",
                                    scheduler_key)

        assert len(scheduler2._launchers) == 0  # No launchers are recovered.

        # Scheduler always receives registered() with the same FrameworkID after failover.
        scheduler2.registered(self._driver, self._framework_id, object())

        assert len(scheduler2._launchers) == 1
        assert scheduler2._launchers["cluster1"].cluster_name == "cluster1"

        password_box = PasswordBox(scheduler_key)

        assert password_box.match(
            password,
            scheduler2._launchers["cluster1"]._cluster.encrypted_password)

        # Now offer the resources for this task.
        scheduler2.resourceOffers(self._driver, [self._offer])

        # One task is launched for the offer.
        assert len(
            scheduler2._launchers["cluster1"]._cluster.active_tasks) == 1

        # Scheduler has recovered the cluster so it doesn't accept another of the same name.
        with pytest.raises(MysosScheduler.ClusterExists):
            scheduler2.create_cluster("cluster1", "mysql_user", 3)

    def test_incompatible_resource_role(self):
        scheduler1 = MysosScheduler(
            self._state,
            self._state_provider,
            self._framework_user,
            "./executor.pex",
            "cmd.sh",
            self._zk_client,
            self._zk_url,
            Amount(5, Time.SECONDS),
            "/etc/mysos/admin_keyfile.yml",
            gen_encryption_key(),
            framework_role='mysos'
        )  # Require 'mysos' but the resources are in '*'.
        scheduler1.registered(self._driver, self._framework_id, object())
        scheduler1.create_cluster("cluster1", "mysql_user", 3)
        scheduler1.resourceOffers(self._driver, [self._offer])

        assert "declineOffer" in self._driver.method_calls
        assert len(self._driver.method_calls["declineOffer"]) == 1
        # [0][0][1]: [First declineOffer call][The positional args][The first positional arg], which is
        # a 'Filters' object.
        assert (self._driver.method_calls["declineOffer"][0][0][1].
                refuse_seconds == INCOMPATIBLE_ROLE_OFFER_REFUSE_DURATION.as_(
                    Time.SECONDS))

    def test_scheduler_metrics(self):
        scheduler_key = gen_encryption_key()

        scheduler = MysosScheduler(self._state, self._state_provider,
                                   self._framework_user, "./executor.pex",
                                   "cmd.sh", self._zk_client, self._zk_url,
                                   Amount(5, Time.SECONDS),
                                   "/etc/mysos/admin_keyfile.yml",
                                   scheduler_key)

        RootMetrics().register_observable('scheduler', scheduler)

        scheduler.registered(self._driver, self._framework_id, object())
        scheduler.create_cluster("cluster1",
                                 "mysql_user",
                                 3,
                                 cluster_password='******')

        sample = RootMetrics().sample()
        assert sample['scheduler.cluster_count'] == 1
        assert sample[
            'scheduler.total_requested_mem_mb'] == DEFAULT_TASK_MEM.as_(
                Data.MB) * 3
        assert sample[
            'scheduler.total_requested_disk_mb'] == DEFAULT_TASK_DISK.as_(
                Data.MB) * 3
        assert sample[
            'scheduler.total_requested_cpus'] == DEFAULT_TASK_CPUS * 3

        scheduler.delete_cluster("cluster1", 'test_password')

        sample = RootMetrics().sample()
        assert sample['scheduler.cluster_count'] == 0
        assert sample['scheduler.total_requested_mem_mb'] == 0
        assert sample['scheduler.total_requested_disk_mb'] == 0
        assert sample['scheduler.total_requested_cpus'] == 0

    def test_scheduler_delete_empty_cluster(self):
        scheduler_key = gen_encryption_key()

        scheduler = MysosScheduler(self._state, self._state_provider,
                                   self._framework_user, "./executor.pex",
                                   "cmd.sh", self._zk_client, self._zk_url,
                                   Amount(5, Time.SECONDS),
                                   "/etc/mysos/admin_keyfile.yml",
                                   scheduler_key)

        scheduler.registered(self._driver, self._framework_id, object())
        _, password = scheduler.create_cluster("cluster1", "mysql_user", 3)

        assert len(scheduler._launchers) == 1

        # Deleting the cluster before any offer comes in for launching any task.
        scheduler.delete_cluster("cluster1", password)

        assert len(scheduler._launchers) == 0
Example #10
0
 def setUp(self):
   self._tmpdir = tempfile.mkdtemp()
   self._state_provider = LocalStateProvider(self._tmpdir)
Example #11
0
class TestScheduler(unittest.TestCase):
  def setUp(self):
    self._driver = FakeDriver()
    self._storage = FakeStorage(SequentialThreadingHandler())
    self._zk_client = FakeClient(storage=self._storage)
    self._zk_client.start()

    self._framework_id = mesos_pb2.FrameworkID()
    self._framework_id.value = "framework_id_0"

    self._offer = mesos_pb2.Offer()
    self._offer.id.value = "offer_id_0"
    self._offer.framework_id.value = self._framework_id.value
    self._offer.slave_id.value = "slave_id_0"
    self._offer.hostname = "localhost"

    resources = create_resources(cpus=4, mem=512 * 3, ports=set([10000, 10001, 10002]))
    self._offer.resources.extend(resources)

    self._framework_user = "******"

    self._zk_url = "zk://host/mysos/test"
    self._cluster = MySQLCluster("cluster0", "user", "pass", 3)

    self._tmpdir = tempfile.mkdtemp()
    self._state_provider = LocalStateProvider(self._tmpdir)

    framework_info = mesos_pb2.FrameworkInfo(
        user=getpass.getuser(),
        name="mysos",
        checkpoint=False)
    self._state = Scheduler(framework_info)

  def tearDown(self):
    shutil.rmtree(self._tmpdir, True)  # Clean up after ourselves.

  def test_scheduler_recovery(self):
    scheduler1 = MysosScheduler(
        self._state,
        self._state_provider,
        self._framework_user,
        "./executor.pex",
        "cmd.sh",
        self._zk_client,
        self._zk_url,
        Amount(5, Time.SECONDS),
        "/etc/mysos/admin_keyfile.yml")
    scheduler1.registered(self._driver, self._framework_id, object())
    scheduler1.create_cluster("cluster1", "mysql_user", 3)
    scheduler1.resourceOffers(self._driver, [self._offer])

    # One task is launched for one offer.
    assert len(scheduler1._launchers["cluster1"]._cluster.tasks) == 1

    with pytest.raises(MysosScheduler.ClusterExists):
      scheduler1.create_cluster("cluster1", "mysql_user", 3)

    # FrameworkID should have been persisted.
    self._state = self._state_provider.load_scheduler_state()
    assert self._state.framework_info.id.value == self._framework_id.value

    # Simulate restart.
    scheduler2 = MysosScheduler(
        self._state,
        self._state_provider,
        self._framework_user,
        "./executor.pex",
        "cmd.sh",
        self._zk_client,
        self._zk_url,
        Amount(5, Time.SECONDS),
        "/etc/mysos/admin_keyfile.yml")

    # Scheduler always receives registered() with the same FrameworkID after failover.
    scheduler2.registered(self._driver, self._framework_id, object())

    assert len(scheduler2._launchers) == 1
    assert scheduler2._launchers["cluster1"].cluster_name == "cluster1"

    # Scheduler has recovered the cluster so it doesn't accept another of the same name.
    with pytest.raises(MysosScheduler.ClusterExists):
      scheduler2.create_cluster("cluster1", "mysql_user", 3)

  def test_scheduler_recovery_failure_before_launch(self):
    scheduler1 = MysosScheduler(
        self._state,
        self._state_provider,
        self._framework_user,
        "./executor.pex",
        "cmd.sh",
        self._zk_client,
        self._zk_url,
        Amount(5, Time.SECONDS),
        "/etc/mysos/admin_keyfile.yml")
    scheduler1.registered(self._driver, self._framework_id, object())
    scheduler1.create_cluster("cluster1", "mysql_user", 3)

    # Simulate restart before the task is successfully launched.
    scheduler2 = MysosScheduler(
        self._state,
        self._state_provider,
        self._framework_user,
        "./executor.pex",
        "cmd.sh",
        self._zk_client,
        self._zk_url,
        Amount(5, Time.SECONDS),
        "/etc/mysos/admin_keyfile.yml")

    assert len(scheduler2._launchers) == 0  # No launchers are recovered.

    # Scheduler always receives registered() with the same FrameworkID after failover.
    scheduler2.registered(self._driver, self._framework_id, object())

    assert len(scheduler2._launchers) == 1
    assert scheduler2._launchers["cluster1"].cluster_name == "cluster1"

    # Now offer the resources for this task.
    scheduler2.resourceOffers(self._driver, [self._offer])

    # One task is launched for the offer.
    assert len(scheduler2._launchers["cluster1"]._cluster.active_tasks) == 1

    # Scheduler has recovered the cluster so it doesn't accept another of the same name.
    with pytest.raises(MysosScheduler.ClusterExists):
      scheduler2.create_cluster("cluster1", "mysql_user", 3)

  def test_incompatible_resource_role(self):
    scheduler1 = MysosScheduler(
        self._state,
        self._state_provider,
        self._framework_user,
        "./executor.pex",
        "cmd.sh",
        self._zk_client,
        self._zk_url,
        Amount(5, Time.SECONDS),
        "/etc/mysos/admin_keyfile.yml",
        framework_role='mysos')  # Require 'mysos' but the resources are in '*'.
    scheduler1.registered(self._driver, self._framework_id, object())
    scheduler1.create_cluster("cluster1", "mysql_user", 3)
    scheduler1.resourceOffers(self._driver, [self._offer])

    assert "declineOffer" in self._driver.method_calls
    assert len(self._driver.method_calls["declineOffer"]) == 1
    # [0][0][1]: [First declineOffer call][The positional args][The first positional arg], which is
    # a 'Filters' object.
    assert (self._driver.method_calls["declineOffer"][0][0][1].refuse_seconds ==
        INCOMPATIBLE_ROLE_OFFER_REFUSE_DURATION.as_(Time.SECONDS))
Example #12
0
class TestScheduler(unittest.TestCase):
  def setUp(self):
    self._driver = FakeDriver()
    self._storage = FakeStorage(SequentialThreadingHandler())
    self._zk_client = FakeClient(storage=self._storage)
    self._zk_client.start()

    self._framework_id = mesos_pb2.FrameworkID()
    self._framework_id.value = "framework_id_0"

    self._offer = mesos_pb2.Offer()
    self._offer.id.value = "offer_id_0"
    self._offer.framework_id.value = self._framework_id.value
    self._offer.slave_id.value = "slave_id_0"
    self._offer.hostname = "localhost"

    resources = create_resources(
        cpus=DEFAULT_TASK_CPUS * 3,
        mem=DEFAULT_TASK_MEM * 3,
        disk=DEFAULT_TASK_DISK * 3,
        ports=set([10000, 10001, 10002]))
    self._offer.resources.extend(resources)

    self._framework_user = "******"

    self._zk_url = "zk://host/mysos/test"
    self._cluster = MySQLCluster(
        "cluster0", "user", "pass", 3, DEFAULT_TASK_CPUS, DEFAULT_TASK_MEM, DEFAULT_TASK_DISK)

    self._tmpdir = tempfile.mkdtemp()
    self._state_provider = LocalStateProvider(self._tmpdir)

    framework_info = mesos_pb2.FrameworkInfo(
        user=getpass.getuser(),
        name="mysos",
        checkpoint=False)
    self._state = Scheduler(framework_info)

  def tearDown(self):
    shutil.rmtree(self._tmpdir, True)  # Clean up after ourselves.

  def test_scheduler_recovery(self):
    scheduler_key = gen_encryption_key()

    scheduler1 = MysosScheduler(
        self._state,
        self._state_provider,
        self._framework_user,
        "./executor.pex",
        "cmd.sh",
        self._zk_client,
        self._zk_url,
        Amount(5, Time.SECONDS),
        "/etc/mysos/admin_keyfile.yml",
        scheduler_key)
    scheduler1.registered(self._driver, self._framework_id, object())
    scheduler1.create_cluster("cluster1", "mysql_user", 3)
    scheduler1.resourceOffers(self._driver, [self._offer])

    # One task is launched for one offer.
    assert len(scheduler1._launchers["cluster1"]._cluster.tasks) == 1

    with pytest.raises(MysosScheduler.ClusterExists):
      scheduler1.create_cluster("cluster1", "mysql_user", 3)

    # FrameworkID should have been persisted.
    self._state = self._state_provider.load_scheduler_state()
    assert self._state.framework_info.id.value == self._framework_id.value

    # Simulate restart.
    scheduler2 = MysosScheduler(
        self._state,
        self._state_provider,
        self._framework_user,
        "./executor.pex",
        "cmd.sh",
        self._zk_client,
        self._zk_url,
        Amount(5, Time.SECONDS),
        "/etc/mysos/admin_keyfile.yml",
        scheduler_key)

    # Scheduler always receives registered() with the same FrameworkID after failover.
    scheduler2.registered(self._driver, self._framework_id, object())

    assert len(scheduler2._launchers) == 1
    assert scheduler2._launchers["cluster1"].cluster_name == "cluster1"

    # Scheduler has recovered the cluster so it doesn't accept another of the same name.
    with pytest.raises(MysosScheduler.ClusterExists):
      scheduler2.create_cluster("cluster1", "mysql_user", 3)

  def test_scheduler_recovery_failure_before_launch(self):
    scheduler_key = gen_encryption_key()

    scheduler1 = MysosScheduler(
        self._state,
        self._state_provider,
        self._framework_user,
        "./executor.pex",
        "cmd.sh",
        self._zk_client,
        self._zk_url,
        Amount(5, Time.SECONDS),
        "/etc/mysos/admin_keyfile.yml",
        scheduler_key)
    scheduler1.registered(self._driver, self._framework_id, object())
    _, password = scheduler1.create_cluster("cluster1", "mysql_user", 3)

    # Simulate restart before the task is successfully launched.
    scheduler2 = MysosScheduler(
        self._state,
        self._state_provider,
        self._framework_user,
        "./executor.pex",
        "cmd.sh",
        self._zk_client,
        self._zk_url,
        Amount(5, Time.SECONDS),
        "/etc/mysos/admin_keyfile.yml",
        scheduler_key)

    assert len(scheduler2._launchers) == 0  # No launchers are recovered.

    # Scheduler always receives registered() with the same FrameworkID after failover.
    scheduler2.registered(self._driver, self._framework_id, object())

    assert len(scheduler2._launchers) == 1
    assert scheduler2._launchers["cluster1"].cluster_name == "cluster1"

    password_box = PasswordBox(scheduler_key)

    assert password_box.match(
        password, scheduler2._launchers["cluster1"]._cluster.encrypted_password)

    # Now offer the resources for this task.
    scheduler2.resourceOffers(self._driver, [self._offer])

    # One task is launched for the offer.
    assert len(scheduler2._launchers["cluster1"]._cluster.active_tasks) == 1

    # Scheduler has recovered the cluster so it doesn't accept another of the same name.
    with pytest.raises(MysosScheduler.ClusterExists):
      scheduler2.create_cluster("cluster1", "mysql_user", 3)

  def test_incompatible_resource_role(self):
    scheduler1 = MysosScheduler(
        self._state,
        self._state_provider,
        self._framework_user,
        "./executor.pex",
        "cmd.sh",
        self._zk_client,
        self._zk_url,
        Amount(5, Time.SECONDS),
        "/etc/mysos/admin_keyfile.yml",
        gen_encryption_key(),
        framework_role='mysos')  # Require 'mysos' but the resources are in '*'.

    RootMetrics().register_observable('scheduler', scheduler1)

    scheduler1.registered(self._driver, self._framework_id, object())
    scheduler1.create_cluster("cluster1", "mysql_user", 3)
    scheduler1.resourceOffers(self._driver, [self._offer])

    assert "declineOffer" in self._driver.method_calls
    assert len(self._driver.method_calls["declineOffer"]) == 1
    # [0][0][1]: [First declineOffer call][The positional args][The first positional arg], which is
    # a 'Filters' object.
    assert (self._driver.method_calls["declineOffer"][0][0][1].refuse_seconds ==
        INCOMPATIBLE_ROLE_OFFER_REFUSE_DURATION.as_(Time.SECONDS))

    sample = RootMetrics().sample()
    assert sample['scheduler.offers_incompatible_role'] == 1

  def test_scheduler_metrics(self):
    scheduler_key = gen_encryption_key()

    scheduler = MysosScheduler(
        self._state,
        self._state_provider,
        self._framework_user,
        "./executor.pex",
        "cmd.sh",
        self._zk_client,
        self._zk_url,
        Amount(5, Time.SECONDS),
        "/etc/mysos/admin_keyfile.yml",
        scheduler_key)

    RootMetrics().register_observable('scheduler', scheduler)

    scheduler.registered(self._driver, self._framework_id, object())

    sample = RootMetrics().sample()
    assert sample['scheduler.framework_registered'] == 1

    scheduler.create_cluster(
        "cluster1", "mysql_user", 3, cluster_password='******')

    sample = RootMetrics().sample()
    assert sample['scheduler.cluster_count'] == 1
    assert sample['scheduler.total_requested_mem_mb'] == DEFAULT_TASK_MEM.as_(Data.MB) * 3
    assert sample['scheduler.total_requested_disk_mb'] == DEFAULT_TASK_DISK.as_(Data.MB) * 3
    assert sample['scheduler.total_requested_cpus'] == DEFAULT_TASK_CPUS * 3

    scheduler.resourceOffers(self._driver, [self._offer])
    sample = RootMetrics().sample()
    assert sample['scheduler.resource_offers'] == 1
    assert sample['scheduler.tasks_launched'] == 1

    status = mesos_pb2.TaskStatus()
    status.state = mesos_pb2.TASK_RUNNING
    status.slave_id.value = self._offer.slave_id.value
    status.task_id.value = 'mysos-cluster1-0'

    scheduler.statusUpdate(self._driver, status)

    status.state = mesos_pb2.TASK_FAILED
    scheduler.statusUpdate(self._driver, status)

    sample = RootMetrics().sample()
    assert sample['scheduler.tasks_failed'] == 1

    scheduler.delete_cluster("cluster1", 'test_password')

    sample = RootMetrics().sample()
    assert sample['scheduler.cluster_count'] == 0
    assert sample['scheduler.total_requested_mem_mb'] == 0
    assert sample['scheduler.total_requested_disk_mb'] == 0
    assert sample['scheduler.total_requested_cpus'] == 0

  def test_scheduler_delete_empty_cluster(self):
    scheduler_key = gen_encryption_key()

    scheduler = MysosScheduler(
        self._state,
        self._state_provider,
        self._framework_user,
        "./executor.pex",
        "cmd.sh",
        self._zk_client,
        self._zk_url,
        Amount(5, Time.SECONDS),
        "/etc/mysos/admin_keyfile.yml",
        scheduler_key)

    scheduler.registered(self._driver, self._framework_id, object())
    _, password = scheduler.create_cluster("cluster1", "mysql_user", 3)

    assert len(scheduler._launchers) == 1

    # Deleting the cluster before any offer comes in for launching any task.
    scheduler.delete_cluster("cluster1", password)

    assert len(scheduler._launchers) == 0