def send_to_marathon(request): try: if request.method == 'POST': action = request.POST.get('action', None) id = request.POST.get('id', None) mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port'])) if action == 'stop': mc.scale_app(id, 0, force=True) elif action == 'start': mc.scale_app(id, 1) elif action == 'destroy': if request.user.has_perm("auth.can_init_app"): mc.delete_app(id) else: raise PermissionDenied elif action == 'restart': mc.restart_app(id) elif action == 'scale': mc.scale_app(id, int(request.POST.get('number_instance'))) elif action == 'update': app = mc.get_app(id) app.cpus = float(request.POST.get('cpus')) app.mem = float(request.POST.get('mem')) app.container.docker.image = request.POST.get('version') mc.update_app(id, app) elif action == "stop-deployment": mc.delete_deployment(id) result = '{"status":"success", "msg": "%(action)s success"}'%{"action":action} except Exception as e: result = '{"status":"error", "msg": "%(action)s fail: %(error)s" }'%{"action":action, "error": html.escape(str(e))} return HttpResponse(result)
def update_app(app_id, config, instances = 1): # # set up marathon client and launch container # image_string = 'docker:///' + config['image'] marathon_client = MarathonClient('http://' + str(marathon_host) + ':' + str(marathon_port)) app = marathon_client.get_app(app_id) # # set up options for cassandra TODO this is terrible dawg # decoded = namespacer.decode_marathon_id(app_id) options = [] if str(decoded['service']) == "cassandra": options = ["-p", "7000:7000", "-p", "9042:9042", "-p", "9160:9160", "-p", "22000:22", "-p", "5000:5000"] # ports = [] # constraints = [["hostname", "UNIQUE"]] marathon_client.update_app( app_id, app, instances = instances, container = { "image" : image_string, "options" : options } )
def update(service, instances = 1): # # set up marathon client and launch container # print 'updating ' + service image_string = 'docker:///' + data['services'][service]['image'] print image_string marathon_client = MarathonClient('http://' + str(data['marathon']['host']) + ':' + str(data['marathon']['port'])) app = marathon_client.get_app(service) # # set up options for cassandra # options = [] if service == "cassandra": options = ["-p", "7000:7000", "-p", "9042:9042", "-p", "9160:9160", "-p", "22000:22", "-p", "5000:5000"] # ports = [] # constraints = [["hostname", "UNIQUE"]] marathon_client.update_app( service, app, instances = instances, container = { "image" : image_string, "options" : options } )
def _update_application(client: MarathonClient, app: MarathonApp, definition_path: str, do_backup: bool = False) -> Union[str, bool]: if do_backup: if not os.path.isdir('./backups'): os.mkdir('./backups/') print('Created backups directory') backup = client.get_app( backup_path = './backups/{}_{}.json'.format( mangling.appid_to_filename(, time.strftime("%Y-%m-%d_%H:%M:%S")) with open(backup_path, 'w') as backup_file: backup_file.write(backup) print('\nBacked app into: {}'.format(backup_path)) else: backup_path = '' print('Updating app: {} (from: {})'.format(, definition_path)) deployment = client.update_app(, app, force=True) # TODO: Handle failure # Return the deployed backup file to build rollback order, if necessary # or False if a user-initiated rollback completed successfully if not wait_for_deployment(client, deployment): client.restart_app( return False if not wait_for_deployment(client, deployment) else backup_path
def update_app_tag(client: MarathonClient, appid: str, new_tag: str): app = client.get_app(appid) reg, img = mangling.split_image_name(app.container.docker.image) img, _ = mangling.split_image_tag(img) new_image = mangling.rebuild_image_name(reg, img, new_tag) app.container.docker.image = new_image deployment = client.update_app(appid, app, force=True) wait_for_deployment(client, deployment)
def send_to_marathon(request): try: if request.method == 'POST': action = request.POST.get('action', None) id = request.POST.get('id', None) mc = MarathonClient('http://{}:{}'.format( settings.MARATHON['host'], settings.MARATHON['port'])) if action == 'stop': mc.scale_app(id, 0, force=True) elif action == 'start': mc.scale_app(id, 1) elif action == 'destroy': if request.user.has_perm("auth.can_init_app"): mc.delete_app(id) else: raise PermissionDenied elif action == 'restart': mc.restart_app(id) elif action == 'scale': mc.scale_app(id, int(request.POST.get('number_instance'))) elif action == 'update': app = mc.get_app(id) app.cpus = float(request.POST.get('cpus')) app.mem = float(request.POST.get('mem')) app.container.docker.image = request.POST.get('version') mc.update_app(id, app) elif action == "stop-deployment": mc.delete_deployment(id) result = '{"status":"success", "msg": "%(action)s success"}' % { "action": action } except Exception as e: result = '{"status":"error", "msg": "%(action)s fail: %(error)s" }' % { "action": action, "error": html.escape(str(e)) } return HttpResponse(result)
def re_deploy(app_name, app_file): """Calls marathon API to redeploy application with new file as request body :param app_name: :param app_file: :return: """ with open(app_file, 'r') as content_file: content = app_attr = json.loads(content) marathon_addresses = _addresses() cli = MarathonClient(marathon_addresses) if _is_deployed(cli, app_name): return cli.update_app(app_name, models.MarathonApp.from_json(app_attr)) else: return None
def deploy(app_definition, marathon_url, instances, auth_token, zero, force): old_appids = [] # Connect to Marathon print("\nConnecting to Marathon...") c = MarathonClient(marathon_url, auth_token=auth_token) print("Connected to", marathon_url) # Pick up the Marathon App Definition file app_json = open(app_definition).read() app = MarathonApp.from_json(json.loads(app_json)) new_app_id = service_name = new_app_id.split("/")[-1].split(".")[0] # Instantiate the new application on DC/OS but don't launch it yet # The application definition instances field should be 0 by default # If forced, the application will be relaunched even if the ID already exists print("\nInstantiating new application on Marathon with", app.instances, "instances...") try: c.create_app(new_app_id, app) except: if force == 'Yes': print("\nForcing redeploy of the same app id...", new_app_id) c.update_app(new_app_id, app, force=True, minimal=True) check_deployment(c, new_app_id) pass else: sys.exit() print("Created app", new_app_id) # List and find currently running apps of the same service # This assumes the naming convention (id): /some/group/service_name.uniquevalue print("\nFinding any existing apps for service:", service_name) for app in c.list_apps(): existing_service_name ="/")[-1].split(".")[0] if (service_name == existing_service_name) and app.instances > 0: print("Found up and running application id:", old_appids.append( # If it's the first deployment ever, just launch the desired number of instances # Otherwise perform a hybrid release # Finally clean up any older app instances running if not old_appids: if instances is None: instances = 2 print("No current apps found. Launching brand new service with", instances, "instances...") c.scale_app(new_app_id, instances=instances) check_deployment(c, new_app_id) check_health(c, new_app_id) else: old_appids.reverse() if zero == 'Yes': print("\nStarting zero downtime deployment for...", new_app_id) for old_appid in old_appids: if instances is None: instances = c.get_app(old_appid).instances if (old_appid == '' or old_appid == new_app_id or old_appid == '/' + new_app_id): print("Scaling existing app_id", new_app_id, "to", instances, "instances...") c.scale_app(new_app_id, instances=instances) check_deployment(c, new_app_id) check_health(c, new_app_id) else: print("Target number of total instances:", instances) delta = int(round(instances * .50)) delta = (delta if delta > 0 else 1) scale(c, new_app_id, old_appid, delta) if (c.get_app(new_app_id).instances != instances): print("\nLaunch", instances - delta, "remaining instance(s) of the new version...") c.scale_app(new_app_id, instances=instances) check_deployment(c, new_app_id) check_health(c, new_app_id) if (c.get_app(old_appid).instances > 0): print( "Finish shutting down remaining instances of the old version..." ) c.scale_app(old_appid, instances=0) check_deployment(c, old_appid) else: print("Started deployment with downtime...") for old_appid in old_appids: c.scale_app(old_appid, instances=0) check_deployment(c, old_appid) c.scale_app(new_app_id, instances=instances) check_deployment(c, new_app_id) check_health(c, new_app_id) print("\nSUCCESS:\nNew application ID:", new_app_id, "\nRunning instances:", instances)
class MarathonWorkers(object): def __init__(self, scheduler, marathon, name=None, nprocs=1, nthreads=0, docker='daskos/daskathon', volumes=[], **kwargs): self.scheduler = scheduler self.executor = ThreadPoolExecutor(1) self.client = MarathonClient(marathon) = name or 'dask-%s' % uuid.uuid4() self.docker = docker self.volumes = volumes self.nprocs = nprocs self.nthreads = nthreads self.options = kwargs def start(self, nworkers=0): # address = self.scheduler.address.replace('tcp://', '') args = ['dask-worker', self.scheduler.address, '--name', '$MESOS_TASK_ID', # use Mesos task ID as worker name '--worker-port', '$PORT_WORKER', '--bokeh-port', '$PORT_BOKEH', '--nanny-port', '$PORT_NANNY', '--nprocs', str(self.nprocs), '--nthreads', str(self.nthreads)] ports = [{'port': 0, 'protocol': 'tcp', 'name': name} for name in ['worker', 'nanny', 'http', 'bokeh']] # healths = [{'portIndex': i, # 'protocol': 'TCP', # 'gracePeriodSeconds': 300, # 'intervalSeconds': 60, # 'timeoutSeconds': 20, # 'maxConsecutiveFailures': 3} # for i, name in enumerate(['worker', 'nanny', 'http', 'bokeh'])] healths = [] if 'mem' in self.options: args.extend(['--memory-limit', str(int(self.options['mem'] * 0.8 * 1e6))]) docker_parameters = [{"key": "volume", "value": v} for v in self.volumes] container = MarathonContainer({'image': self.docker, 'forcePullImage': True, 'parameters': docker_parameters}) command = ' '.join(args) app = MarathonApp(instances=nworkers, container=container, port_definitions=ports, cmd=command, health_checks=healths, **self.options) self.client.update_app(, app)'Started marathon workers {}'.format( def close(self):'Stopping marathon workers {}'.format( self.client.delete_app(, force=True) def scale_up(self, n): self.executor.submit(self.client.scale_app,, instances=n) def scale_down(self, workers): for worker in workers: self.executor.submit(self.client.kill_task,, self.scheduler.worker_info[worker]['name'], scale=True)
def update_app_from_json(self, json_data, force): a = MarathonApp.from_json(json_data) return MarathonClient.update_app(self,, a, force)
verify=False) except MarathonError as e: logging.error("Failed to connect to Marathon! {}".format(e)) exit_code = 1 sys.exit(exit_code)"Deploying application...") try: app = client.get_app(marathon_app_id) except MarathonHttpError: response = client.create_app(marathon_app_id, app_definition) version = response.version depolyment_id = response.deployments[0].id else: response = client.update_app(marathon_app_id, app_definition, force=marathon_force) version = response['version'] deployment_id = response['deploymentId']"New version deployed: {}".format(version)) if app_definition.instances == 0: "Deactivated application by setting instances to 0, deployment complete." ) exit_code = 0 sys.exit(exit_code) ### Get newly created Mesos task
def deploy(marathon, name, docker, volume, scheduler_cpus, scheduler_mem, adaptive, port, bokeh_port, constraint, maximum_over_capacity, minimum_health_capacity, label, uri, jupyter, **kwargs): name = name or 'daskathon-{}'.format(str(uuid.uuid4())[-4:]) kwargs['name'] = '{}-workers'.format(name) kwargs['docker'] = docker kwargs['port'] = port kwargs['bokeh_port'] = bokeh_port args = [('--{}'.format(k.replace('_', '-')), str(v)) for k, v in kwargs.items() if v not in (None, '')] for c in constraint: args.append(('--constraint', c)) for u in uri: args.append(('--uri', u)) for v in volume: args.append(('--volume', v)) if maximum_over_capacity: args.append(('--maximum-over-capacity', str(maximum_over_capacity))) if minimum_health_capacity: args.append( ('--minimum-health-capacity', str(minimum_health_capacity))) args = list(concat(args)) if adaptive: args.append('--adaptive') client = MarathonClient(marathon) docker_parameters = [{"key": "volume", "value": v} for v in volume] container = MarathonContainer({ 'image': docker, 'forcePullImage': True, 'parameters': docker_parameters }) args = ['daskathon', 'run'] + args + [marathon] cmd = ' '.join(args) healths = [{ 'portIndex': i, 'protocol': 'TCP' } for i, _ in enumerate(['scheduler', 'bokeh'])] services = [('scheduler', port), ('bokeh', bokeh_port)] ports = [{ 'port': p, 'protocol': 'tcp', 'name': service } for (service, p) in services] constraints = [c.split(':')[:3] for c in constraint] labels = dict([l.split(':') for l in label]) upgrade_strategy = { 'maximum_over_capacity': maximum_over_capacity, 'minimum_health_capacity': minimum_health_capacity } scheduler = MarathonApp(instances=1, container=container, cpus=scheduler_cpus, mem=scheduler_mem, task_kill_grace_period_seconds=20, port_definitions=ports, health_checks=healths, constraints=constraints, upgrade_strategy=upgrade_strategy, labels=labels, uris=uri, require_ports=True, cmd=cmd) client.update_app('{}-scheduler'.format(name), scheduler) if jupyter: cmd = ('jupyter notebook --allow-root --no-browser ' '--NotebookApp.token=\'\' --ip --port $PORT_NOTEBOOK') ports = [{'port': 0, 'protocol': 'tcp', 'name': 'notebook'}] jupyter = deepcopy(scheduler) jupyter.cmd = cmd jupyter.port_definitions = ports client.update_app('{}-jupyter'.format(name), jupyter)
class MarathonSpawner(Spawner): app_image = Unicode("jupyterhub/singleuser:%s" % _jupyterhub_xy, config=True) app_prefix = Unicode("jupyter", help=dedent(""" Prefix for app names. The full app name for a particular user will be <prefix>/<username>/notebook. """)).tag(config=True) marathon_host = Unicode( u'', help="Hostname of Marathon server").tag(config=True) marathon_constraints = List( [], help='Constraints to be passed through to Marathon').tag(config=True) unreachable_strategy = Any( None, help='Unreachable strategy to be passed through to Marathon').tag( config=True) volumes = List([], help=dedent(""" A list in Marathon REST API format for mounting volumes into the docker container. [ { "containerPath": "/foo", "hostPath": "/bar", "mode": "RW" } ] Note that using the template variable {username} in containerPath, hostPath or the name variable in case it's an external drive it will be replaced with the current user's name. """)).tag(config=True) max_cpu = Float(2, config=True) cpu = Float(1, config=True) max_mem = Float(4096, config=True) mem = Float(1024, config=True) max_disk = Float(20000, config=True) disk = Float(5000, config=True) max_gpu = Integer(0, config=True) gpu = Integer(0, config=True) mesos_user = Unicode(None, config=True, allow_none=True) autotimeout = Integer( None, help="Seconds to automatically timeout unused notebook servers", config=True, allow_none=True) hub_ip_connect = Unicode( "", help="Public IP address of the hub").tag(config=True) @observe('hub_ip_connect') def _ip_connect_changed(self, change): if jupyterhub.version_info >= (0, 8): warnings.warn( "MarathonSpawner.hub_ip_connect is no longer needed with JupyterHub 0.8." " Use JupyterHub.hub_connect_ip instead.", DeprecationWarning, ) hub_port_connect = Integer(-1, help="Public PORT of the hub").tag(config=True) @observe('hub_port_connect') def _port_connect_changed(self, change): if jupyterhub.version_info >= (0, 8): warnings.warn( "MarathonSpawner.hub_port_connect is no longer needed with JupyterHub 0.8." " Use JupyterHub.hub_connect_port instead.", DeprecationWarning, ) format_volume_name = Any( help="""Any callable that accepts a string template and a Spawner instance as parameters in that order and returns a string. """).tag(config=True) @default('format_volume_name') def _get_default_format_volume_name(self): return default_format_volume_name # fix default port to 8888, used in the container @default('port') def _port_default(self): return 8888 # default to listening on all-interfaces in the container @default('ip') def _ip_default(self): return '' _executor = None @property def executor(self): cls = self.__class__ if cls._executor is None: cls._executor = ThreadPoolExecutor(5) return cls._executor def __init__(self, *args, **kwargs): super(MarathonSpawner, self).__init__(*args, **kwargs) self.marathon = MarathonClient(self.marathon_host) self.get_state() @property def app_id(self): return '/%s/%s/notebook' % (self.app_prefix, def get_state(self): state = super(MarathonSpawner, self).get_state() state['user_options'] = self.stored_user_options = self.user_options return state def load_state(self, state): super(MarathonSpawner, self).load_state(state) self.stored_user_options = state.get('user_options', {}) def get_health_checks(self): health_checks = [] health_checks.append( MarathonHealthCheck(protocol='TCP', port_index=0, grace_period_seconds=300, interval_seconds=30, timeout_seconds=20, max_consecutive_failures=0)) return health_checks def get_volumes(self): volumes = [] for v in self.volumes: mv = MarathonContainerVolume.from_json(v) mv.container_path = self.format_volume_name( mv.container_path, self) mv.host_path = self.format_volume_name(mv.host_path, self) if mv.external and 'name' in mv.external: mv.external['name'] = self.format_volume_name( mv.external['name'], self) volumes.append(mv) return volumes def get_constraints(self): constraints = [] for c in self.marathon_constraints: constraints.append(MarathonConstraint.from_json(c)) return constraints def get_ip_and_port(self, app_info): assert len(app_info.tasks) == 1 ip = socket.gethostbyname(app_info.tasks[0].host) return (ip, app_info.tasks[0].ports[0]) @run_on_executor def get_app_info(self, app_id): try: app = self.marathon.get_app(app_id, embed_tasks=True) except NotFoundError:"The %s application has not been started yet", app_id) return None else: return app def _public_hub_api_url(self): uri = urlparse(self.hub.api_url) port = self.hub_port_connect if self.hub_port_connect > 0 else uri.port ip = self.hub_ip_connect if self.hub_ip_connect else uri.hostname return urlunparse((uri.scheme, '%s:%s' % (ip, port), uri.path, uri.params, uri.query, uri.fragment)) def get_args(self): args = super().get_args() if self.hub_ip_connect: # JupyterHub 0.7 specifies --hub-api-url # on the command-line, which is hard to update for idx, arg in enumerate(list(args)): if arg.startswith('--hub-api-url='): args.pop(idx) break args.append('--hub-api-url=%s' % self._public_hub_api_url()) for idx, arg in enumerate(list(args)): if arg.startswith('--port='): args.pop(idx) break args.append('--port=$PORT0') return args def options_from_form(self, formdata): options = {} options['app_image'] = formdata['app_image'][0] or None if 'force_pull_image' in formdata: options['force_pull_image'] = formdata['force_pull_image'][ 0] == 'on' options['cpu'] = float(formdata['cpu'][0]) options['mem'] = float(formdata['mem'][0]) options['disk'] = float(formdata['disk'][0]) if formdata.get('gpu', None): options['gpu'] = int(formdata['gpu'][0]) return options @property def options_form(self): template = """ <div class="form-group"> <label for="app_image">Image <span class="label label-default">Optional</span></label> <input id="app_image" class="form-control" name="app_image" type="text" placeholder="e.g. %(default_app_image)s" value="%(app_image)s" /> </div> <div class="checkbox"> <label for="force_pull_image"> <input id="force_pull_image" name="force_pull_image" type="checkbox" value="on" /> Force pull image </label> </div> <div class="form-group"> <div class="row"> <div class="col-sm-4"> <label for="cpu">CPU</label> <input id="cpu" class="form-control" name="cpu" type="number" step="any" value="%(cpu)s" min="%(min_cpu)s" max="%(max_cpu)s" required /> </div> <div class="col-sm-4"> <label for="mem">Mem (MiB)</label> <input id="mem" class="form-control" name="mem" type="number" step="any" value="%(mem)s" min="%(min_mem)s" max="$(max_mem)s" required /> </div> <div class="col-sm-4"> <label for="disk">Disk (MiB)</label> <input id="disk" class="form-control" name="disk" type="number" step="any" value="%(disk)s" min="%(min_disk)s" max="%(max_disk)s" required /> </div> </div> </div> """ % { 'default_app_image': self.app_image, 'app_image': self.stored_user_options.get('app_image', None) or '', 'min_cpu': 0.001, 'max_cpu': self.max_cpu, 'cpu': remove_zeros(str(self.stored_user_options.get('cpu', self.cpu))), 'min_mem': 32, 'max_mem': self.max_mem, 'mem': remove_zeros(str(self.stored_user_options.get('mem', self.mem))), 'min_disk': 1000, 'max_disk': self.max_disk, 'disk': remove_zeros(str(self.stored_user_options.get('disk', self.disk))), } if self.max_gpu > 0: template += """ <div class="form-group"> <div class="row"> <div class="col-sm-4"> <label for="gpu">GPU</label> <input id="gpu" class="form-control" name="gpu" type="number" step="1" value="%(gpu)s" min="%(min_gpu)s" max="%(max_gpu)s" required /> </div> </div> </div> """ % { 'min_gpu': 0, 'max_gpu': self.max_gpu, 'gpu': self.stored_user_options.get('gpu', self.gpu), } return """<div>%s</div>""" % template @gen.coroutine def start(self): app_image = self.user_options.get('app_image', None) or self.app_image force_pull_image = self.user_options.get('force_pull_image', False)"starting a Marathon app with image=%s" % app_image) container_params = { 'image': app_image, 'force_pull_image': force_pull_image } docker_container = MarathonDockerContainer(**container_params) app_container = MarathonContainer(docker=docker_container, type='MESOS', volumes=self.get_volumes()) cpu = self.user_options.get('cpu', None) mem = self.user_options.get('mem', None) disk = self.user_options.get('disk', None) gpu = self.user_options.get('gpu', None)"resource: (cpu=%s, mem=%s, disk=%s, gpu=%s)" % (cpu, mem, disk, gpu)) cmd = self.cmd + self.get_args() env = self.get_env() port_definitions = [PortDefinition(port=0, protocol='tcp')] app_request = MarathonApp( id=self.app_id, cmd=' '.join( cmd), # cmd does not use Docker image's default entrypoint env=env, cpus=cpu, mem=mem, disk=disk, gpus=gpu, user=self.mesos_user, container=app_container, port_definitions=port_definitions, networks=[{ 'mode': 'host' }], constraints=self.get_constraints(), health_checks=self.get_health_checks(), unreachable_strategy=self.unreachable_strategy, instances=1) app_info = self.get_app_info(self.app_id) try: if app_info: self.marathon.update_app(self.app_id, app_request, force=True) else: self.marathon.create_app(self.app_id, app_request) except Exception as e: self.log.error("Failed to create application for %s: %s", self.app_id, e) raise e while True: app_info = yield self.get_app_info(self.app_id) if app_info is None: raise MarathonSpawnerException("Application %s is lost", self.app_id) elif app_info.instances == 0: raise MarathonSpawnerException( "No instance for application %s", self.app_id) elif app_info.tasks_healthy == 1: ip, port = self.get_ip_and_port(app_info) break yield gen.sleep(1) return (ip, port) @gen.coroutine def stop(self, now=False): try: self.marathon.update_app(self.app_id, MarathonApp(instances=0), force=True) except Exception as e: self.log.error("Failed to delete application %s", self.app_id) raise e else: if not now: while True: app_info = yield self.get_app_info(self.app_id) if app_info is None: # Stopping application is lost, just ignore it! break elif len(app_info.deployments) == 0: # This is the success case. break yield gen.sleep(1) @gen.coroutine def poll(self): app_info = yield self.get_app_info(self.app_id) if app_info is None: self.log.error("Application %s is lost", self.app_id) return 3 for deployment in app_info.deployments: for current_action in deployment.current_actions: if current_action.action == 'StopApplication': self.log.error("Application %s is shutting down", self.app_id) return 1 if app_info.tasks_healthy == 0: self.log.error("No healthy instance for application %s", self.app_id) return 2 if self.autotimeout is not None: tm_diff = datetime.utcnow() - self.user.last_activity self.log.debug("Application %s is inactive for %d sec", self.app_id, tm_diff.seconds) if tm_diff > timedelta(seconds=self.autotimeout): "Stopping application %s because it's inactive for more than %d sec", self.app_id, self.autotimeout) # Do not yield the result of stop here self.stop() return 0 return None
def update_app_from_json( self, json_data, force ): a = MarathonApp.from_json(json_data) return MarathonClient.update_app(self,, a, force)