Beispiel #1
0
def test_parse_errors():
  with pytest.raises(ValueError) as e:
    parse("host1:port1")
  assert e.value.message == "Expecting 'zk://' at the beginning of the URL"

  # This method doesn't validate the values in the tuple.
  assert parse("zk://") == (None, "", "/")
  assert parse("zk://host_no_port") == (None, "host_no_port", "/")
  assert parse("zk://jake@host") == ("jake", "host", "/")
def test_parse_errors():
    with pytest.raises(ValueError) as e:
        parse("host1:port1")
    assert e.value.message == "Expecting 'zk://' at the beginning of the URL"

    # This method doesn't validate the values in the tuple.
    assert parse("zk://") == (None, "", "/")
    assert parse("zk://host_no_port") == (None, "host_no_port", "/")
    assert parse("zk://jake@host") == ("jake", "host", "/")
Beispiel #3
0
def resolve_master(
      cluster_url, master_callback=lambda: True, termination_callback=lambda: True, zk_client=None):
  """
    Resolve the MySQL cluster master's endpoint from the given URL for this cluster.
    :param cluster_url: The ZooKeeper URL for this cluster.
    :param master_callback: A callback method with one argument: the ServiceInstance for the elected
                            master.
    :param termination_callback: A callback method with no argument. Invoked when the cluster
                                 terminates.
    :param zk_client: Use a custom ZK client instead of Kazoo if specified.
  """
  try:
    _, zk_servers, cluster_path = zookeeper.parse(cluster_url)
  except Exception as e:
    raise ValueError("Invalid cluster_url: %s" % e.message)

  if not zk_client:
    zk_client = KazooClient(zk_servers)
    zk_client.start()

  listener = ClusterListener(
      zk_client,
      cluster_path,
      None,
      master_callback=master_callback,
      termination_callback=termination_callback)
  listener.start()
  def from_task(self, task, sandbox):
    data = json.loads(task.data)
    cluster_name, host, port, zk_url = data['cluster'], data['host'], data['port'], data['zk_url']
    _, servers, path = parse(zk_url)
    kazoo = KazooClient(servers)
    kazoo.start()
    self_instance = ServiceInstance(Endpoint(host, port))

    try:
      task_control = self._task_control_provider.from_task(task, sandbox)
      installer = self._installer_provider.from_task(task, sandbox)
      backup_store = self._backup_store_provider.from_task(task, sandbox)
    except (TaskControl.Error, PackageInstaller.Error) as e:
      kazoo.stop()  # Kazoo needs to be cleaned up. See kazoo/issues/217.
      raise TaskError(e.message)

    state_manager = StateManager(sandbox, backup_store)

    return MysosTaskRunner(
        self_instance,
        kazoo,
        get_cluster_path(path, cluster_name),
        installer,
        task_control,
        state_manager)
Beispiel #5
0
def resolve_master(cluster_url,
                   master_callback=lambda: True,
                   termination_callback=lambda: True,
                   zk_client=None):
    """
    Resolve the MySQL cluster master's endpoint from the given URL for this cluster.
    :param cluster_url: The ZooKeeper URL for this cluster.
    :param master_callback: A callback method with one argument: the ServiceInstance for the elected
                            master.
    :param termination_callback: A callback method with no argument. Invoked when the cluster
                                 terminates.
    :param zk_client: Use a custom ZK client instead of Kazoo if specified.
  """
    try:
        _, zk_servers, cluster_path = zookeeper.parse(cluster_url)
    except Exception as e:
        raise ValueError("Invalid cluster_url: %s" % e.message)

    if not zk_client:
        zk_client = KazooClient(zk_servers)
        zk_client.start()

    listener = ClusterListener(zk_client,
                               cluster_path,
                               None,
                               master_callback=master_callback,
                               termination_callback=termination_callback)
    listener.start()
Beispiel #6
0
def test_parse():
  assert parse("zk://*****:*****@host1:port1") == ("jake:1", "host1:port1", "/")
  assert parse("zk://*****:*****@host1:port1/") == ("jake:1", "host1:port1", "/")
  assert (parse("zk://*****:*****@host1:port1,host2:port2") ==
          ("jake:1", "host1:port1,host2:port2", "/"))
  assert (parse("zk://*****:*****@host1:port1,host2:port2/") ==
          ("jake:1", "host1:port1,host2:port2", "/"))
  assert (parse("zk://*****:*****@host1:port1,host2:port2/path/to/znode") ==
          ("jake:1", "host1:port1,host2:port2", "/path/to/znode"))
def test_parse():
    assert parse("zk://*****:*****@host1:port1") == ("jake:1", "host1:port1", "/")
    assert parse("zk://*****:*****@host1:port1/") == ("jake:1", "host1:port1", "/")
    assert (parse("zk://*****:*****@host1:port1,host2:port2") == (
        "jake:1", "host1:port1,host2:port2", "/"))
    assert (parse("zk://*****:*****@host1:port1,host2:port2/") == (
        "jake:1", "host1:port1,host2:port2", "/"))
    assert (parse("zk://*****:*****@host1:port1,host2:port2/path/to/znode") == (
        "jake:1", "host1:port1,host2:port2", "/path/to/znode"))
Beispiel #8
0
    def from_task(self, task, sandbox):
        data = json.loads(task.data)
        cluster_name, port, zk_url = data['cluster'], data['port'], data[
            'zk_url']

        _, servers, path = zookeeper.parse(zk_url)

        zk_client = FakeClient()
        zk_client.start()
        self_instance = ServiceInstance(
            Endpoint(socket.gethostbyname(socket.gethostname()), port))
        task_control = self._task_control_provider.from_task(task, sandbox)

        return MysosTaskRunner(self_instance, zk_client,
                               posixpath.join(path, cluster_name),
                               NoopPackageInstaller(), task_control, Fake())
Beispiel #9
0
  def from_task(self, task, sandbox):
    data = json.loads(task.data)
    cluster_name, port, zk_url = data['cluster'], data['port'], data['zk_url']

    _, servers, path = zookeeper.parse(zk_url)

    zk_client = FakeClient()
    zk_client.start()
    self_instance = ServiceInstance(Endpoint(socket.gethostbyname(socket.gethostname()), port))
    task_control = self._task_control_provider.from_task(task, sandbox)

    return MysosTaskRunner(
        self_instance,
        zk_client,
        posixpath.join(path, cluster_name),
        NoopPackageInstaller(),
        task_control,
        Fake())
Beispiel #10
0
    def from_task(self, task, sandbox):
        data = json.loads(task.data)
        cluster_name, host, port, zk_url = data['cluster'], data['host'], data[
            'port'], data['zk_url']
        _, servers, path = parse(zk_url)
        kazoo = KazooClient(servers)
        kazoo.start()
        self_instance = ServiceInstance(Endpoint(host, port))

        try:
            task_control = self._task_control_provider.from_task(task, sandbox)
            installer = self._installer_provider.from_task(task, sandbox)
            backup_store = self._backup_store_provider.from_task(task, sandbox)
        except (TaskControl.Error, PackageInstaller.Error) as e:
            raise TaskError(e.message)

        state_manager = StateManager(sandbox, backup_store)

        return MysosTaskRunner(self_instance, kazoo,
                               get_cluster_path(path, cluster_name), installer,
                               task_control, state_manager)
Beispiel #11
0
    def main(args, options):
        log.info("Options in use: %s", options)

        if not options.api_port:
            app.error('Must specify --port')

        if not options.mesos_master:
            app.error('Must specify --mesos_master')

        if not options.framework_user:
            app.error('Must specify --framework_user')

        if not options.executor_uri:
            app.error('Must specify --executor_uri')

        if not options.executor_cmd:
            app.error('Must specify --executor_cmd')

        if not options.zk_url:
            app.error('Must specify --zk_url')

        if not options.admin_keypath:
            app.error('Must specify --admin_keypath')

        try:
            election_timeout = parse_time(options.election_timeout)
            framework_failover_timeout = parse_time(
                options.framework_failover_timeout)
        except InvalidTime as e:
            app.error(e.message)

        try:
            _, zk_servers, zk_root = zookeeper.parse(options.zk_url)
        except Exception as e:
            app.error("Invalid --zk_url: %s" % e.message)

        web_assets_dir = os.path.join(options.work_dir, "web")
        pkgutil.unpack_assets(web_assets_dir, MYSOS_MODULE, ASSET_RELPATH)
        log.info("Extracted web assets into %s" % options.work_dir)

        fw_principal = None
        fw_secret = None
        if options.framework_authentication_file:
            try:
                with open(options.framework_authentication_file, "r") as f:
                    cred = yaml.load(f)
                fw_principal = cred["principal"]
                fw_secret = cred["secret"]
                log.info(
                    "Loaded credential (principal=%s) for framework authentication"
                    % fw_principal)
            except IOError as e:
                app.error(
                    "Unable to read the framework authentication key file: %s"
                    % e)
            except (KeyError, yaml.YAMLError) as e:
                app.error(
                    "Invalid framework authentication key file format %s" % e)

        log.info("Starting Mysos scheduler")

        kazoo = KazooClient(zk_servers)
        kazoo.start()

        if options.state_storage == 'zk':
            log.info("Using ZooKeeper (path: %s) for state storage" % zk_root)
            state_provider = ZooKeeperStateProvider(kazoo, zk_root)
        else:
            log.info("Using local disk for state storage")
            state_provider = LocalStateProvider(options.work_dir)

        try:
            state = state_provider.load_scheduler_state()
        except StateProvider.Error as e:
            app.error(e.message)

        if state:
            log.info("Successfully restored scheduler state")
            framework_info = state.framework_info
            if framework_info.HasField('id'):
                log.info("Recovered scheduler's FrameworkID is %s" %
                         framework_info.id.value)
        else:
            log.info("No scheduler state to restore")
            framework_info = FrameworkInfo(
                user=options.framework_user,
                name=FRAMEWORK_NAME,
                checkpoint=True,
                failover_timeout=framework_failover_timeout.as_(Time.SECONDS),
                role=options.framework_role)
            if fw_principal:
                framework_info.principal = fw_principal
            state = Scheduler(framework_info)
            state_provider.dump_scheduler_state(state)

        scheduler = MysosScheduler(state,
                                   state_provider,
                                   options.framework_user,
                                   options.executor_uri,
                                   options.executor_cmd,
                                   kazoo,
                                   options.zk_url,
                                   election_timeout,
                                   options.admin_keypath,
                                   installer_args=options.installer_args,
                                   backup_store_args=options.backup_store_args,
                                   executor_environ=options.executor_environ,
                                   framework_role=options.framework_role)

        if fw_principal and fw_secret:
            cred = Credential(principal=fw_principal, secret=fw_secret)
            scheduler_driver = mesos.native.MesosSchedulerDriver(
                scheduler, framework_info, options.mesos_master, cred)
        else:
            scheduler_driver = mesos.native.MesosSchedulerDriver(
                scheduler, framework_info, options.mesos_master)

        scheduler_driver.start()

        server = HttpServer()
        server.mount_routes(MysosServer(scheduler, web_assets_dir))

        et = ExceptionalThread(target=server.run,
                               args=('0.0.0.0', options.api_port, 'cherrypy'))
        et.daemon = True
        et.start()

        try:
            # Wait for the scheduler to stop.
            # The use of 'stopped' event instead of scheduler_driver.join() is necessary to stop the
            # process with SIGINT.
            while not scheduler.stopped.wait(timeout=0.5):
                pass
        except KeyboardInterrupt:
            log.info('Interrupted, exiting.')
        else:
            log.info('Scheduler exited.')

        app.shutdown(
            1
        )  # Mysos scheduler is supposed to be long-running thus the use of exit status 1.
Beispiel #12
0
  def main(args, options):
    log.info("Options in use: %s", options)

    if not options.api_port:
      app.error('Must specify --port')

    if not options.mesos_master:
      app.error('Must specify --mesos_master')

    if not options.framework_user:
      app.error('Must specify --framework_user')

    if not options.executor_uri:
      app.error('Must specify --executor_uri')

    if not options.executor_cmd:
      app.error('Must specify --executor_cmd')

    if not options.zk_url:
      app.error('Must specify --zk_url')

    if not options.admin_keypath:
      app.error('Must specify --admin_keypath')

    try:
      election_timeout = parse_time(options.election_timeout)
      framework_failover_timeout = parse_time(options.framework_failover_timeout)
    except InvalidTime as e:
      app.error(e.message)

    try:
      _, zk_servers, zk_root = zookeeper.parse(options.zk_url)
    except Exception as e:
      app.error("Invalid --zk_url: %s" % e.message)

    web_assets_dir = os.path.join(options.work_dir, "web")
    pkgutil.unpack_assets(web_assets_dir, MYSOS_MODULE, ASSET_RELPATH)
    log.info("Extracted web assets into %s" % options.work_dir)

    fw_principal = None
    fw_secret = None
    if options.framework_authentication_file:
      try:
        with open(options.framework_authentication_file, "r") as f:
          cred = yaml.load(f)
        fw_principal = cred["principal"]
        fw_secret = cred["secret"]
        log.info("Loaded credential (principal=%s) for framework authentication" % fw_principal)
      except IOError as e:
        app.error("Unable to read the framework authentication key file: %s" % e)
      except (KeyError, yaml.YAMLError) as e:
        app.error("Invalid framework authentication key file format %s" % e)

    log.info("Starting Mysos scheduler")

    kazoo = KazooClient(zk_servers)
    kazoo.start()

    if options.state_storage == 'zk':
      log.info("Using ZooKeeper (path: %s) for state storage" % zk_root)
      state_provider = ZooKeeperStateProvider(kazoo, zk_root)
    else:
      log.info("Using local disk for state storage")
      state_provider = LocalStateProvider(options.work_dir)

    try:
      state = state_provider.load_scheduler_state()
    except StateProvider.Error as e:
      app.error(e.message)

    if state:
      log.info("Successfully restored scheduler state")
      framework_info = state.framework_info
      if framework_info.HasField('id'):
        log.info("Recovered scheduler's FrameworkID is %s" % framework_info.id.value)
    else:
      log.info("No scheduler state to restore")
      framework_info = FrameworkInfo(
          user=options.framework_user,
          name=FRAMEWORK_NAME,
          checkpoint=True,
          failover_timeout=framework_failover_timeout.as_(Time.SECONDS),
          role=options.framework_role)
      if fw_principal:
        framework_info.principal = fw_principal
      state = Scheduler(framework_info)
      state_provider.dump_scheduler_state(state)

    scheduler = MysosScheduler(
        state,
        state_provider,
        options.framework_user,
        options.executor_uri,
        options.executor_cmd,
        kazoo,
        options.zk_url,
        election_timeout,
        options.admin_keypath,
        installer_args=options.installer_args,
        backup_store_args=options.backup_store_args,
        executor_environ=options.executor_environ,
        framework_role=options.framework_role)

    if fw_principal and fw_secret:
      cred = Credential(principal=fw_principal, secret=fw_secret)
      scheduler_driver = mesos.native.MesosSchedulerDriver(
          scheduler,
          framework_info,
          options.mesos_master,
          cred)
    else:
      scheduler_driver = mesos.native.MesosSchedulerDriver(
          scheduler,
          framework_info,
          options.mesos_master)

    scheduler_driver.start()

    server = HttpServer()
    server.mount_routes(MysosServer(scheduler, web_assets_dir))

    et = ExceptionalThread(
        target=server.run, args=('0.0.0.0', options.api_port, 'cherrypy'))
    et.daemon = True
    et.start()

    try:
      # Wait for the scheduler to stop.
      # The use of 'stopped' event instead of scheduler_driver.join() is necessary to stop the
      # process with SIGINT.
      while not scheduler.stopped.wait(timeout=0.5):
        pass
    except KeyboardInterrupt:
      log.info('Interrupted, exiting.')
    else:
      log.info('Scheduler exited.')

    app.shutdown(1)  # Mysos scheduler is supposed to be long-running thus the use of exit status 1.
Beispiel #13
0
  def __init__(
      self,
      driver,
      cluster,
      state_provider,
      zk_url,
      kazoo,
      framework_user,
      executor_uri,
      executor_cmd,
      election_timeout,
      admin_keypath,
      scheduler_key,
      installer_args=None,
      backup_store_args=None,
      executor_environ=None,
      framework_role='*',
      query_interval=Amount(1, Time.SECONDS)):
    """
      :param driver: Mesos scheduler driver.
      :param cluster: The MySQLCluster state object.
      :param state_provider: For restoring and persisting the cluster state.
      :param zk_url: The ZooKeeper URL for cluster member discovery and master election.
      :param kazoo: The Kazoo client to access ZooKeeper with.
      :param executor_uri: See flags.
      :param executor_cmd: See flags.
      :param election_timeout: See flags.
      :param admin_keypath: See flags.
      :param scheduler_key: Used for encrypting cluster passwords.
      :param installer_args: See flags.
      :param backup_store_args: See flags.
      :param executor_environ: See flags.
      :param framework_role: See flags.
      :param query_interval: See MySQLMasterElector. Use the default value for production and allow
                             tests to use a different value.
    """
    self._driver = driver

    if not isinstance(cluster, MySQLCluster):
      raise TypeError("'cluster' should be an instance of MySQLCluster")
    self._cluster = cluster

    if not isinstance(state_provider, StateProvider):
      raise TypeError("'state_provider' should be an instance of StateProvider")
    self._state_provider = state_provider

    self._framework_role = framework_role

    # Passed along to executors.
    self._zk_url = zk_url
    self._framework_user = framework_user
    self._executor_uri = executor_uri
    self._executor_cmd = executor_cmd
    self._election_timeout = election_timeout
    self._admin_keypath = admin_keypath
    self._installer_args = installer_args
    self._backup_store_args = backup_store_args
    self._executor_environ = executor_environ

    # Used by the elector.
    self._query_interval = query_interval

    zk_root = zookeeper.parse(zk_url)[2]
    self._cluster_manager = ClusterManager(kazoo, get_cluster_path(zk_root, cluster.name))

    self._password_box = PasswordBox(scheduler_key)
    self._password_box.decrypt(cluster.encrypted_password)  # Validate the password.

    self._lock = threading.Lock()

    if self._cluster.master_id:
      log.info("Republish master %s for cluster %s in case it's not published" % (
          self._cluster.master_id, self.cluster_name))
      self._cluster_manager.promote_member(self._cluster.master_id)

    if len(self._cluster.tasks) > 0:
      log.info("Recovered %s tasks for cluster '%s'" % (
          len(self._cluster.tasks), self.cluster_name))

    # A recovered launcher should continue the election if the previous one was incomplete when the
    # scheduler failed over. Mesos will deliver all missed events that affect the election to the
    # scheduler.
    if len(self._cluster.running_tasks) > 0 and not self._cluster.master_id:
      log.info("Restarting election for the recovered launcher")
      self._elector = self._new_elector()
      # Add current slaves.
      for t in self._cluster.running_tasks:
        self._elector.add_slave(t.task_id, t.mesos_slave_id)
      self._elector.start()
    else:
      # New launcher, the elector is set when the election starts and reset to None when it ends.
      self._elector = None

    self._terminating = False
Beispiel #14
0
    def __init__(self,
                 driver,
                 cluster,
                 state_provider,
                 zk_url,
                 kazoo,
                 framework_user,
                 executor_uri,
                 executor_cmd,
                 election_timeout,
                 admin_keypath,
                 scheduler_key,
                 installer_args=None,
                 backup_store_args=None,
                 executor_environ=None,
                 executor_source_prefix=None,
                 framework_role='*',
                 query_interval=Amount(1, Time.SECONDS)):
        """
      :param driver: Mesos scheduler driver.
      :param cluster: The MySQLCluster state object.
      :param state_provider: For restoring and persisting the cluster state.
      :param zk_url: The ZooKeeper URL for cluster member discovery and master election.
      :param kazoo: The Kazoo client to access ZooKeeper with.
      :param executor_uri: See flags.
      :param executor_cmd: See flags.
      :param election_timeout: See flags.
      :param admin_keypath: See flags.
      :param scheduler_key: Used for encrypting cluster passwords.
      :param installer_args: See flags.
      :param backup_store_args: See flags.
      :param executor_environ: See flags.
      :param executor_source_prefix: See flags.
      :param framework_role: See flags.
      :param query_interval: See MySQLMasterElector. Use the default value for production and allow
                             tests to use a different value.
    """
        self._driver = driver

        if not isinstance(cluster, MySQLCluster):
            raise TypeError("'cluster' should be an instance of MySQLCluster")
        self._cluster = cluster

        if not isinstance(state_provider, StateProvider):
            raise TypeError(
                "'state_provider' should be an instance of StateProvider")
        self._state_provider = state_provider

        self._framework_role = framework_role

        # Passed along to executors.
        self._zk_url = zk_url
        self._framework_user = framework_user
        self._executor_uri = executor_uri
        self._executor_cmd = executor_cmd
        self._election_timeout = election_timeout
        self._admin_keypath = admin_keypath
        self._installer_args = installer_args
        self._backup_store_args = backup_store_args
        self._executor_environ = executor_environ
        self._executor_source_prefix = executor_source_prefix

        # Used by the elector.
        self._query_interval = query_interval

        zk_root = zookeeper.parse(zk_url)[2]
        self._cluster_manager = ClusterManager(
            kazoo, get_cluster_path(zk_root, cluster.name))

        self._password_box = PasswordBox(scheduler_key)
        self._password_box.decrypt(
            cluster.encrypted_password)  # Validate the password.

        self._lock = threading.Lock()

        if self._cluster.master_id:
            log.info(
                "Republish master %s for cluster %s in case it's not published"
                % (self._cluster.master_id, self.cluster_name))
            self._cluster_manager.promote_member(self._cluster.master_id)

        if len(self._cluster.tasks) > 0:
            log.info("Recovered %s tasks for cluster '%s'" %
                     (len(self._cluster.tasks), self.cluster_name))

        # A recovered launcher should continue the election if the previous one was incomplete when the
        # scheduler failed over. Mesos will deliver all missed events that affect the election to the
        # scheduler.
        if len(self._cluster.running_tasks
               ) > 0 and not self._cluster.master_id:
            log.info("Restarting election for the recovered launcher")
            self._elector = self._new_elector()
            # Add current slaves.
            for t in self._cluster.running_tasks:
                self._elector.add_slave(t.task_id, t.mesos_slave_id)
            self._elector.start()
        else:
            # New launcher, the elector is set when the election starts and reset to None when it ends.
            self._elector = None

        self._terminating = False