def run(self): log('mesos url: %s' % self.mesos_url) log("required resources (cpus/mem): %s/%s" % (self.resources_cpus, self.resources_mem)) log("docker image: %s" % (self.docker_image)) cmd = self.command() env_vars = self.env_vars() log('cmd: %s' % cmd) log('env vars: %s' % env_vars) framework = mesos_pb2.FrameworkInfo() framework.user = "" # Have Mesos fill in the current user. framework.name = "Luigi Task" framework.checkpoint = True framework.principal = "luigi-task" implicitAcknowledgements = 1 log("starting mesos driver") driver = mesos.native.MesosSchedulerDriver( SimpleScheduler(self.docker_image, cmd, self.resources_cpus, self.resources_mem, env_vars), framework, self.mesos_url, implicitAcknowledgements) status = 0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1 log("driver stoped with status: %s" % status) # Ensure that the driver process terminates. driver.stop() self.on_complete()
def __init__(self, options, command): self.framework_id = None self.executor = None self.framework = mesos_pb2.FrameworkInfo() self.framework.user = getuser() if self.framework.user == 'root': raise Exception("mrun is not allowed to run as 'root'") name = '[mrun] ' + ' '.join(sys.argv[1:]) if len(name) > 256: name = name[:256] + '...' self.framework.name = name self.framework.hostname = socket.gethostname() self.cpus = options.cpus self.mem = parse_mem(options.mem) self.options = options self.command = command self.total_tasks = list( reversed([Task(i) for i in range(options.start, options.tasks)])) self.task_launched = {} self.slaveTasks = {} self.started = False self.stopped = False self.status = 0 self.next_try = 0 self.lock = threading.RLock() self.last_offer_time = time.time()
def start(self): def readable(fd): return bool(select.select([fd], [], [], 0.1)[0]) lfd = socket.socket() try: lfd.bind(('', 0)) self.addr = '%s:%s' % (socket.gethostname(), lfd.getsockname()[1]) lfd.listen(10) framework = mesos_pb2.FrameworkInfo() framework.user = getpass.getuser() framework.name = self.name framework.hostname = socket.gethostname() self.driver = MesosSchedulerDriver(self, framework, self.master) self.driver.start() while any((not task.initalized for task in self.tasks)): if readable(lfd): c, _ = lfd.accept() if readable(c): mesos_task_id, addr = recv(c) assert isinstance(mesos_task_id, int) task = self.tasks[mesos_task_id] task.addr = addr task.connection = c task.initalized = True else: c.close() return self._start_tf_cluster() except Exception: self.stop() raise finally: lfd.close()
def launch_framework(): """ Launch the Calico framework. :return: The Mesos driver. The caller should call driver.join() to ensure thread is blocked until driver exits. """ _log.info("Connecting to Master: %s", config.mesos_master) framework = mesos_pb2.FrameworkInfo() framework.user = "******" framework.name = "calico" framework.principal = "calico" framework.failover_timeout = 604800 framework.role = "slave_public" framework.webui_url = config.webserver_url old_id = zk.get_framework_id() if old_id: _log.info("Using old framework ID: %s", old_id) framework.id.value = old_id _log.info("Launching Calico Mesos scheduler") scheduler = CalicoInstallerScheduler() driver = mesos.native.MesosSchedulerDriver(scheduler, framework, config.mesos_master) driver.start() return driver
def create_driver(framework_name, scheduler, system_paasta_config, implicit_acks=False): framework = mesos_pb2.FrameworkInfo() framework.user = "" # Have Mesos fill in the current user. framework.name = framework_name framework.failover_timeout = 604800 framework.id.value = find_existing_id_if_exists_or_gen_new(framework.name) framework.checkpoint = True credential = mesos_pb2.Credential() credential.principal = system_paasta_config.get_paasta_native_config( )['principal'] credential.secret = system_paasta_config.get_paasta_native_config( )['secret'] framework.principal = system_paasta_config.get_paasta_native_config( )['principal'] driver = MesosSchedulerDriver( scheduler, framework, '%s:%d' % (mesos_tools.get_mesos_leader(), mesos_tools.MESOS_MASTER_PORT), implicit_acks, credential) return driver
def run(self): logger.info("mesos url: {}".format(self.mesos_url)) logger.info("required resources (cpus/mem): {}/{}".format( self.resources_cpus, self.resources_mem)) logger.info("docker image: {}".format(self.docker_image)) logger.info("cmd: {}".format(self.docker_command)) logger.info("env vars: {}".format(dict(self.env_vars))) framework = mesos_pb2.FrameworkInfo() framework.user = "" # Have Mesos fill in the current user. framework.name = "Luigi Task" framework.checkpoint = True framework.principal = "luigi-task" implicit_acknowledgements = 1 logger.info("starting mesos driver") driver = mesos.native.MesosSchedulerDriver( SimpleScheduler(self.docker_image, self.docker_command, self.resources_cpus, self.resources_mem, self.env_vars), framework, self.mesos_url, implicit_acknowledgements) status = 0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1 logger.info("driver stoped with status: {}".format(status)) # Ensure that the driver process terminates. driver.stop() self.on_complete()
def start_driver(self): name = '[dpark] ' + os.path.abspath(sys.argv[0]) + ' ' + ' '.join( sys.argv[1:]) if len(name) > 256: name = name[:256] + '...' framework = mesos_pb2.FrameworkInfo() framework.user = getuser() if framework.user == 'root': raise Exception("dpark is not allowed to run as 'root'") framework.name = name framework.hostname = socket.gethostname() self.driver = mesos.MesosSchedulerDriver(self, framework, self.master) self.driver.start() logger.debug("Mesos Scheudler driver started") self.started = True self.last_finish_time = time.time() def check(): while self.started: now = time.time() if not self.activeJobs and now - self.last_finish_time > MAX_IDLE_TIME: logger.info("stop mesos scheduler after %d seconds idle", now - self.last_finish_time) self.stop() break time.sleep(1) spawn(check)
def setUp(self): self._driver = FakeDriver() self._storage = FakeStorage(SequentialThreadingHandler()) self._zk_client = FakeClient(storage=self._storage) self._zk_client.start() self._framework_id = mesos_pb2.FrameworkID() self._framework_id.value = "framework_id_0" self._offer = mesos_pb2.Offer() self._offer.id.value = "offer_id_0" self._offer.framework_id.value = self._framework_id.value self._offer.slave_id.value = "slave_id_0" self._offer.hostname = "localhost" resources = create_resources(cpus=4, mem=512 * 3, ports=set([10000, 10001, 10002])) self._offer.resources.extend(resources) self._framework_user = "******" self._zk_url = "zk://host/mysos/test" self._cluster = MySQLCluster("cluster0", "user", "pass", 3) self._tmpdir = tempfile.mkdtemp() self._state_provider = LocalStateProvider(self._tmpdir) framework_info = mesos_pb2.FrameworkInfo(user=getpass.getuser(), name="mysos", checkpoint=False) self._state = Scheduler(framework_info)
def start_driver(self): name = 'OpenCluster' if self.options.name: name = "%s-%s" % (name, self.options.name) else: name = "%s-%s" % ( name, datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")) if len(name) > 256: name = name[:256] + '...' framework = mesos_pb2.FrameworkInfo() framework.user = getuser() if framework.user == 'root': raise Exception("OpenCluster is not allowed to run as 'root'") framework.name = name framework.hostname = socket.gethostname() self.driver = MesosSchedulerDriver(self, framework, self.master) self.driver.start() logger.debug("Mesos Scheudler driver started") self.shuttingdown = False self.last_finish_time = time.time() self.stopped = False
def test_decode_framework_info(): message = mesos_pb2.FrameworkInfo(id=mesos_pb2.FrameworkID(value='test')) wrapped = decode(message) assert isinstance(wrapped, MessageProxy) assert isinstance(wrapped, FrameworkInfo) assert isinstance(wrapped.id, MessageProxy) assert isinstance(wrapped.id, FrameworkID)
def main(master): logging.basicConfig(level=logging.INFO, format='[%(asctime)s %(levelname)s] %(message)s') # Create a new executor executor = mesos_pb2.ExecutorInfo() executor.executor_id.value = 'ExampleExecutor' executor.name = executor.executor_id.value executor.command.value = os.path.abspath('./executor-skeleton.py') # Create a new framework framework = mesos_pb2.FrameworkInfo() framework.user = '' # the current user framework.name = 'ExampleFramework' framework.checkpoint = True implicitAcknowledgements = 1 if os.getenv('EXAMPLE_AUTHENTICATE'): logging.info('Enabling framework authentication') credential = mesos_pb2.Credential() credential.principal = os.getenv('EXAMPLE_PRINCIPAL') credential.secret = os.getenv('EXAMPLE_SECRET') framework.principal = os.getenv('EXAMPLE_PRINCIPAL') driver = MesosSchedulerDriver(ExampleScheduler(executor), framework, master, implicitAcknowledgements, credential) else: framework.principal = framework.name driver = MesosSchedulerDriver(ExampleScheduler(executor), framework, master, implicitAcknowledgements) def signal_handler(signal, frame): logging.info('Shutting down') driver.stop() # driver.run() blocks, so we run it in a separate thread. # This way, we can catch a SIGINT to kill the framework. def run_driver_thread(): status = 0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1 driver.stop() # Ensure the driver process terminates sys.exit(status) driver_thread = Thread(target=run_driver_thread, args=()) driver_thread.start() logging.info('Scheduler running, Ctrl-C to exit') signal.signal(signal.SIGINT, signal_handler) # Block the main thread while the driver thread is alive while driver_thread.is_alive(): time.sleep(1) logging.info('Framework finished.') sys.exit(0)
def run_scheduler(self, mesos_master): logger.info("I am the leader") self.scheduler = ScaleScheduler() self.scheduler.initialize() scheduler_mgr.hostname = socket.getfqdn() framework = mesos_pb2.FrameworkInfo() framework.user = '' # Have Mesos fill in the current user. framework.name = os.getenv('DCOS_PACKAGE_FRAMEWORK_NAME', 'Scale') webserver_address = os.getenv('SCALE_WEBSERVER_ADDRESS') if webserver_address: framework.webui_url = webserver_address logger.info('Connecting to Mesos master at %s', mesos_master) # TODO(vinod): Make checkpointing the default when it is default on the slave. if MESOS_CHECKPOINT: logger.info('Enabling checkpoint for the framework') framework.checkpoint = True if MESOS_AUTHENTICATE: logger.info('Enabling authentication for the framework') if not DEFAULT_PRINCIPLE: logger.error( 'Expecting authentication principal in the environment') sys.exit(1) if not DEFAULT_SECRET: logger.error( 'Expecting authentication secret in the environment') sys.exit(1) credential = mesos_pb2.Credential() credential.principal = DEFAULT_PRINCIPLE credential.secret = DEFAULT_SECRET self.driver = MesosSchedulerDriver(self.scheduler, framework, mesos_master, credential) else: self.driver = MesosSchedulerDriver(self.scheduler, framework, mesos_master) try: status = 0 if self.driver.run() == mesos_pb2.DRIVER_STOPPED else 1 except: status = 1 logger.exception('Mesos Scheduler Driver returned an exception') #Perform a shut down and return any non-zero status shutdown_status = self._shutdown() status = status or shutdown_status logger.info('Exiting...') sys.exit(status)
def mesos_framework(cfg): """Establish framework information """ framework = MesosPb2.FrameworkInfo() framework.user = cfg.mesos.user framework.name = cfg.mesos.framework_name #framework.principal = cfg.mesos.principal #framework.role = cfg.mesos.role return framework
def test_registered(self): driver = ExecutorDriver() executor_info = mesos_pb2.ExecutorInfo() framework_info = mesos_pb2.FrameworkInfo() slave_info = mesos_pb2.SlaveInfo() self.executor_base.registered(driver, executor_info, framework_info, slave_info) assert self.executor_base._driver == driver assert self.executor_base._executor_info == executor_info assert self.executor_base._framework_info == framework_info assert self.executor_base._slave_info == slave_info
def start_framework(master_uri, exe_path, cpu_alloc, mem_alloc, refuse_seconds): executor = mesos_pb2.ExecutorInfo() executor.executor_id.value = "default" executor.command.value = exe_path executor.name = "QoSon Executor (Python)" executor.source = "network monitoring" framework = mesos_pb2.FrameworkInfo() framework.user = "" # Have Mesos fill in the current user. framework.name = "QoSon Framework (Python)" implicitAcknowledgements = 1 if os.getenv("MESOS_EXPLICIT_ACKNOWLEDGEMENTS"): print "Enabling explicit status update acknowledgements" implicitAcknowledgements = 0 # create a scheduler and capture the command line options sched = NetmonScheduler(implicitAcknowledgements, executor, cpu_alloc, mem_alloc, refuse_seconds) if os.getenv("MESOS_CHECKPOINT"): print "Enabling checkpoint for the framework" framework.checkpoint = True if os.getenv("MESOS_AUTHENTICATE"): print "Enabling authentication for the framework" if not os.getenv("DEFAULT_PRINCIPAL"): print "Expecting authentication principal in the environment" sys.exit(1) if not os.getenv("DEFAULT_SECRET"): print "Expecting authentication secret in the environment" sys.exit(1) credential = mesos_pb2.Credential() credential.principal = os.getenv("DEFAULT_PRINCIPAL") credential.secret = os.getenv("DEFAULT_SECRET") framework.principal = os.getenv("DEFAULT_PRINCIPAL") driver = mesos.native.MesosSchedulerDriver( sched, framework, master_uri, implicitAcknowledgements, credential) else: framework.principal = "test-framework-python" driver = mesos.native.MesosSchedulerDriver( sched, framework, master_uri, implicitAcknowledgements) return driver, sched
def _startDriver(self): """ The Mesos driver thread which handles the scheduler's communication with the Mesos master """ framework = mesos_pb2.FrameworkInfo() framework.user = "" # Have Mesos fill in the current user. framework.name = "toil" framework.principal = framework.name self.driver = mesos.native.MesosSchedulerDriver( self, framework, self._resolveAddress(self.mesosMasterAddress), True) # enable implicit acknowledgements assert self.driver.start() == mesos_pb2.DRIVER_RUNNING
def __init__(self, scheduler, config): super(MesosDRMS, self).__init__(scheduler, config) if "mesos" not in self.config: logging.error("Mesos not configured") return self.driver_thread = None self.sched = NebularMesos(scheduler, config) self.framework = mesos_pb2.FrameworkInfo() self.framework.user = "" # Have Mesos fill in the current user. self.framework.name = "Nebula" ## additional authentication stuff would go here self.driver = mesos.native.MesosSchedulerDriver(self.sched, self.framework, self.config['mesos'])
def setUp(self): self.framework_id = mesos_pb2.FrameworkID(value=self.FRAMEWORK_ID) self.framework_info = mesos_pb2.FrameworkInfo( user='******', name='fake_framework_name', ) self.command_info = mesos_pb2.CommandInfo(value='fake-command') self.executor_id = mesos_pb2.ExecutorID(value='fake-executor-id') self.executor_info = mesos_pb2.ExecutorInfo( executor_id=self.executor_id, framework_id=self.framework_id, command=self.command_info, ) self.slave_id = mesos_pb2.SlaveID(value='fake-slave-id') self.offer_id = mesos_pb2.OfferID(value='1')
def run_scheduler(self, mesos_master): logger.info("I am the leader") self.scheduler = ScaleScheduler() framework = mesos_pb2.FrameworkInfo() framework.user = '' # Have Mesos fill in the current user. framework.name = 'Scale' logger.info('Connecting to Mesos master at %s', mesos_master) # TODO(vinod): Make checkpointing the default when it is default on the slave. if MESOS_CHECKPOINT: logger.info('Enabling checkpoint for the framework') framework.checkpoint = True if MESOS_AUTHENTICATE: logger.info('Enabling authentication for the framework') if not DEFAULT_PRINCIPLE: logger.error( 'Expecting authentication principal in the environment') sys.exit(1) if not DEFAULT_SECRET: logger.error( 'Expecting authentication secret in the environment') sys.exit(1) credential = mesos_pb2.Credential() credential.principal = DEFAULT_PRINCIPLE credential.secret = DEFAULT_SECRET self.driver = MesosSchedulerDriver(self.scheduler, framework, mesos_master, credential) else: self.driver = MesosSchedulerDriver(self.scheduler, framework, mesos_master) status = 0 if self.driver.run() == mesos_pb2.DRIVER_STOPPED else 1 # Perform any required clean up operations like stopping background threads status = status or self._shutdown() logger.info('Exiting...') sys.exit(status)
def __init__(self, options, command, implicitAcknowledgements): self.framework_id = None self.executor = None self.implicitAcknowledgements = implicitAcknowledgements self.framework = mesos_pb2.FrameworkInfo() self.framework.user = getuser() # if self.framework.user == 'root': # raise Exception("drun is not allowed to run as 'root'") name = 'OpenCluster-Factory' if len(name) > 256: name = name[:256] + '...' self.framework.name = name self.framework.hostname = socket.gethostname() self.options = options self.command = command self.tasks_waiting = {} self.task_launched = {} self.slaveTasks = {} self.stopped = False self.status = 0 self.last_offer_time = time.time() self.lock = threading.RLock() self.priority_size = 4 self.warehouse_addrs = self.options.warehouse_addr.split(",") self.outputdb = None self.kafka_client = None self.producer = None if len(self.warehouse_addrs) > 2: mysqlIpAndPort = self.warehouse_addrs[0].split(":") self.outputdb = MySQLdb.connect(host=mysqlIpAndPort[0], port=int(mysqlIpAndPort[1]), db=self.warehouse_addrs[1], user=self.warehouse_addrs[2], passwd=self.warehouse_addrs[3]) else: self.kafka_client = KafkaClient(self.options.warehouse_addr) self.producer = SimpleProducer(self.kafka_client) for i in range(1, self.priority_size + 1): self.tasks_waiting[i] = []
def _startDriver(self): """ The Mesos driver thread which handles the scheduler's communication with the Mesos master """ framework = mesos_pb2.FrameworkInfo() framework.user = "" # Have Mesos fill in the current user. framework.name = "toil" if os.getenv("MESOS_CHECKPOINT"): log.debug("Enabling checkpoint for the framework") framework.checkpoint = True if os.getenv("MESOS_AUTHENTICATE"): raise NotImplementedError( "Authentication is currently not supported") else: framework.principal = framework.name self.driver = mesos.native.MesosSchedulerDriver( self, framework, self.masterIP, self.implicitAcknowledgements) assert self.driver.start() == mesos_pb2.DRIVER_RUNNING
def init_mesos_scheduler(ns, MV, exception_sender, mesos_ready): import mesos.interface from mesos.interface import mesos_pb2 try: import mesos.native except ImportError: log.error( "Oops! Mesos native bindings are not installed. You can download" " these binaries from mesosphere.", extra=dict(mesos_framework_name=ns.mesos_framework_name)) raise log.info( 'starting mesos scheduler', extra=dict(mesos_framework_name=ns.mesos_framework_name)) # build framework framework = mesos_pb2.FrameworkInfo() framework.user = "" # Have Mesos fill in the current user. framework.name = "Relay.Mesos: %s" % ns.mesos_framework_name if ns.mesos_framework_principal: framework.principal = ns.mesos_framework_principal if ns.mesos_framework_role: framework.role = ns.mesos_framework_role if ns.mesos_checkpoint: framework.checkpoint = True # build driver driver = mesos.native.MesosSchedulerDriver( Scheduler( MV=MV, exception_sender=exception_sender, mesos_ready=mesos_ready, ns=ns), framework, ns.mesos_master) atexit.register(driver.stop) # run things status = 0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1 driver.stop() # Ensure that the driver process terminates. sys.exit(status)
def main(master): executor = mesos_pb2.ExecutorInfo() executor.executor_id.value = 'MinimalExecutor' executor.name = executor.executor_id.value executor.command.value = os.path.abspath('./executor-minimal.py') framework = mesos_pb2.FrameworkInfo() framework.user = '' # the current user framework.name = 'MinimalFramework' framework.checkpoint = True framework.principal = framework.name implicitAcknowledgements = 1 driver = MesosSchedulerDriver( MinimalScheduler(executor), framework, master, implicitAcknowledgements ) def signal_handler(signal, frame): driver.stop() def run_driver_thread(): status = 0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1 driver.stop() sys.exit(status) driver_thread = Thread(target=run_driver_thread, args=()) driver_thread.start() print('Scheduler running, Ctrl-C to quit.') signal.signal(signal.SIGINT, signal_handler) while driver_thread.is_alive(): time.sleep(1) sys.exit(0)
def __init__(self, name, options, command): self.framework_id = None self.executor = None self.framework = mesos_pb2.FrameworkInfo() self.framework.user = getuser() if self.framework.user == 'root': raise Exception("drun is not allowed to run as 'root'") self.framework.name = name self.framework.hostname = socket.gethostname() self.cpus = options.cpus self.mem = memory_str_to_mb(options.mem) self.options = options self.command = command self.started = False self.stopped = False self.status = 0 self.next_try = 0 self.lock = threading.RLock() self.last_offer_time = time.time() self.task_launched = {} self.slaveTasks = {}
def test_executor_event_handlers(mocker): executor = mocker.Mock() driver = mocker.Mock() proxy = ExecutorProxy(executor) proxy.registered(driver, mesos_pb2.ExecutorInfo(), mesos_pb2.FrameworkInfo(), mesos_pb2.SlaveInfo()) proxy.reregistered(driver, mesos_pb2.SlaveInfo()) proxy.disconnected(driver) proxy.launchTask(driver, mesos_pb2.TaskInfo()) proxy.killTask(driver, mesos_pb2.TaskID()) proxy.frameworkMessage(driver, 'message') proxy.shutdown(driver) proxy.error(driver, 'message') executor.on_registered.assert_called_once() executor.on_reregistered.assert_called_once() executor.on_disconnected.assert_called_once() executor.on_launch.assert_called_once() executor.on_kill.assert_called_once() executor.on_message.assert_called_once() executor.on_shutdown.assert_called_once() executor.on_error.assert_called_once()
def main(args): # Get the path to our executor executor_path = os.path.abspath("./executor") # Then, boot up all the Mesos crap and get us registered with the framework. executor = mesos_pb2.ExecutorInfo() executor.executor_id.value = "test-case-executor" executor.command.value = executor_path executor.name = "Test case repeater" cpus = executor.resources.add() mem = executor.resources.add() cpus.name = "cpus" cpus.type = mesos_pb2.Value.SCALAR cpus.scalar.value = 0 mem.name = "mem" mem.type = mesos_pb2.Value.SCALAR mem.scalar.value = 128 #executor.container.MergeFrom(container) framework = mesos_pb2.FrameworkInfo() framework.user = "" # Have Mesos fill in the current user. framework.name = "Test case repeater framework" framework.checkpoint = True framework.principal = "test-case-repeater" driver = MesosSchedulerDriver( TestCaseScheduler(args.database, executor, args.batch), framework, args.controller, 1) status = None if driver.run() == mesos_pb2.DRIVER_STOPPED: status = 0 else: status = 1 return status
"using offer %s.", task.task_id.value, offer.id.value) driver.launchTasks(offer.id, [task]) def statusUpdate(self, driver, update): ''' when a task is started, over, killed or lost (slave crash, ....), this method will be triggered with a status message. ''' logging.info("Task %s is in state %s" % (update.task_id.value, mesos_pb2.TaskState.Name(update.state))) if __name__ == '__main__': framework = mesos_pb2.FrameworkInfo() framework.user = "" # Have Mesos fill in the current user. framework.name = "hello-world" driver = MesosSchedulerDriver( HelloWorldScheduler(), framework, "zk://localhost:2181/mesos" # assumes running on the master ) driver.start() logging.info("Listening for Ctrl-C") signal.signal(signal.SIGINT, shutdown) while True: time.sleep(5) sys.exit(0)
def start(self): self.task_queue = Queue() self.result_queue = Queue() framework = mesos_pb2.FrameworkInfo() framework.user = '' if not configuration.get('mesos', 'MASTER'): self.log.error("Expecting mesos master URL for mesos executor") raise AirflowException( "mesos.master not provided for mesos executor") master = configuration.get('mesos', 'MASTER') framework.name = get_framework_name() if not configuration.get('mesos', 'TASK_CPU'): task_cpu = 1 else: task_cpu = configuration.getint('mesos', 'TASK_CPU') if not configuration.get('mesos', 'TASK_MEMORY'): task_memory = 256 else: task_memory = configuration.getint('mesos', 'TASK_MEMORY') if configuration.getboolean('mesos', 'CHECKPOINT'): framework.checkpoint = True if configuration.get('mesos', 'FAILOVER_TIMEOUT'): # Import here to work around a circular import error from airflow.models import Connection # Query the database to get the ID of the Mesos Framework, if available. conn_id = FRAMEWORK_CONNID_PREFIX + framework.name session = Session() connection = session.query(Connection).filter_by( conn_id=conn_id).first() if connection is not None: # Set the Framework ID to let the scheduler reconnect with running tasks. framework.id.value = connection.extra framework.failover_timeout = configuration.getint( 'mesos', 'FAILOVER_TIMEOUT') else: framework.checkpoint = False self.log.info( 'MesosFramework master : %s, name : %s, cpu : %s, mem : %s, checkpoint : %s', master, framework.name, str(task_cpu), str(task_memory), str(framework.checkpoint)) implicit_acknowledgements = 1 if configuration.getboolean('mesos', 'AUTHENTICATE'): if not configuration.get('mesos', 'DEFAULT_PRINCIPAL'): self.log.error( "Expecting authentication principal in the environment") raise AirflowException( "mesos.default_principal not provided in authenticated mode" ) if not configuration.get('mesos', 'DEFAULT_SECRET'): self.log.error( "Expecting authentication secret in the environment") raise AirflowException( "mesos.default_secret not provided in authenticated mode") credential = mesos_pb2.Credential() credential.principal = configuration.get('mesos', 'DEFAULT_PRINCIPAL') credential.secret = configuration.get('mesos', 'DEFAULT_SECRET') framework.principal = credential.principal driver = mesos.native.MesosSchedulerDriver( AirflowMesosScheduler(self.task_queue, self.result_queue, task_cpu, task_memory), framework, master, implicit_acknowledgements, credential) else: framework.principal = 'Airflow' driver = mesos.native.MesosSchedulerDriver( AirflowMesosScheduler(self.task_queue, self.result_queue, task_cpu, task_memory), framework, master, implicit_acknowledgements) self.mesos_driver = driver self.mesos_driver.start()
def _init_framework(self): framework = mesos_pb2.FrameworkInfo() framework.user = getpass.getuser() framework.name = repr(self) framework.hostname = socket.gethostname() return framework
def run_forever(conf): """Entrypoint to keep the framework running until terminated.""" logger.info('******************Start************') logger.debug('DB connection: %s', conf.l2_db_con) logger.debug("Minimum Senes Per Seg: %s", conf.minscenesperseg) logger.debug('Segment query: %s', conf.segment_query) global shutdown db.reset_records(db.connect(conf.l2_db_con)) # Establish framework, executor, and authentication credentials framework = mesos_pb2.FrameworkInfo() framework.user = conf.framework_user framework.name = "ARD Tile Framework" framework.principal = conf.mesos_principal framework.role = conf.mesos_role executor = mesos_pb2.ExecutorInfo() executor.executor_id.value = "default" executor.name = "ARD Tile executor" implicit_acks = 1 scheduler = ArdTileScheduler(implicit_acks, executor, conf) if not conf.disable_creds: logger.info(" MESOS creds ENABLED") credential = mesos_pb2.Credential() credential.principal = conf.mesos_principal credential.secret = conf.mesos_secret driver = mesos.native.MesosSchedulerDriver(scheduler, framework, conf.master, implicit_acks, credential) else: logger.info(" MESOS creds disabled") driver = mesos.native.MesosSchedulerDriver(scheduler, framework, conf.master, implicit_acks) shutdown = Shutdown() def run_driver_async(): """Thread for async communication with Mesos offers.""" # driver.run() blocks, so run it in a separate thread. status = 0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1 driver.stop() sys.exit(status) framework_thread = Thread(target=run_driver_async, args=()) framework_thread.start() while framework_thread.is_alive(): # If a shutdown has been requested, suppress offers and wait for the # framework thread to complete. if shutdown.flag: logger.info("Shutdown requested....") driver.suppressOffers() while framework_thread.is_alive(): logger.debug("Thread alive, sleep 5....") time.sleep(5) break # If the job queue is empty, get work. if (not scheduler.jobs and queue_segments(scheduler.jobs, conf, db.connect(conf.l2_db_con)) == ERROR): driver.stop(True) sys.exit(1) # If there's no new work to be done or the max number of jobs are # already running, suppress offers and wait for some jobs to finish. if (not scheduler.jobs or not scheduler.scheduling_allowed()): logger.info("No jobs or scheduling not allowed....") driver.suppressOffers() while not scheduler.scheduling_allowed(): logger.debug("Scheduling not alive, sleep 20....") time.sleep(20) while not scheduler.jobs: if queue_segments(scheduler.jobs, conf, db.connect(conf.l2_db_con)) == ERROR: driver.stop(True) sys.exit(1) time.sleep(20) driver.reviveOffers()