Example #1
0
def send_to_marathon(request):
    try:
        if request.method == 'POST':
            action = request.POST.get('action', None)
            id = request.POST.get('id', None)
            mc = MarathonClient('http://{}:{}'.format(settings.MARATHON['host'], settings.MARATHON['port']))
            if action == 'stop':
                mc.scale_app(id, 0, force=True)
            elif action == 'start':
                mc.scale_app(id, 1)
            elif action == 'destroy':
                if request.user.has_perm("auth.can_init_app"):
                    mc.delete_app(id)
                else:
                    raise PermissionDenied
            elif action == 'restart':
                mc.restart_app(id)
            elif action == 'scale':
                mc.scale_app(id, int(request.POST.get('number_instance')))
            elif action == 'update':
                app = mc.get_app(id)
                app.cpus = float(request.POST.get('cpus'))
                app.mem = float(request.POST.get('mem'))
                app.container.docker.image = request.POST.get('version')
                mc.update_app(id, app)
            elif action  == "stop-deployment":
                mc.delete_deployment(id)
            result = '{"status":"success", "msg": "%(action)s success"}'%{"action":action}
    except Exception as e:
        result = '{"status":"error", "msg": "%(action)s fail: %(error)s" }'%{"action":action, "error": html.escape(str(e))}
    return HttpResponse(result)
Example #2
0
def update_app(app_id, config, instances = 1):
	#
	# set up marathon client and launch container
	#
	image_string = 'docker:///' + config['image']
	marathon_client = MarathonClient('http://' + str(marathon_host) + ':' + str(marathon_port))
	app = marathon_client.get_app(app_id)
	#
	# set up options for cassandra TODO this is terrible dawg
	#
	decoded = namespacer.decode_marathon_id(app_id)
	options = []
	if str(decoded['service']) == "cassandra":
		options = ["-p", "7000:7000", "-p", "9042:9042", "-p", "9160:9160", "-p", "22000:22", "-p", "5000:5000"]
		# ports = []
		# constraints = [["hostname", "UNIQUE"]]

	marathon_client.update_app(
		app_id,
		app,
		instances = instances,
		container = {
			"image" : image_string, 
			"options" : options
		}
	)
Example #3
0
def update(service, instances = 1):
	#
	# set up marathon client and launch container
	#
	print 'updating ' + service
	image_string = 'docker:///' + data['services'][service]['image']
	print image_string
	marathon_client = MarathonClient('http://' + str(data['marathon']['host']) + ':' + str(data['marathon']['port']))
	app = marathon_client.get_app(service)
	#
	# set up options for cassandra
	#
	options = []
	if service == "cassandra":
		options = ["-p", "7000:7000", "-p", "9042:9042", "-p", "9160:9160", "-p", "22000:22", "-p", "5000:5000"]
		# ports = []
		# constraints = [["hostname", "UNIQUE"]]
	marathon_client.update_app(
		service,
		app,
		instances = instances,
		container = {
			"image" : image_string, 
			"options" : options
		}
	)
Example #4
0
def _update_application(client: MarathonClient,
                        app: MarathonApp,
                        definition_path: str,
                        do_backup: bool = False) -> Union[str, bool]:
    if do_backup:
        if not os.path.isdir('./backups'):
            os.mkdir('./backups/')
            print('Created backups directory')
        backup = client.get_app(app.id).to_json()
        backup_path = './backups/{}_{}.json'.format(
            mangling.appid_to_filename(app.id),
            time.strftime("%Y-%m-%d_%H:%M:%S"))
        with open(backup_path, 'w') as backup_file:
            backup_file.write(backup)
            print('\nBacked app into: {}'.format(backup_path))
    else:
        backup_path = ''
    print('Updating app: {} (from: {})'.format(app.id, definition_path))
    deployment = client.update_app(app.id, app, force=True)
    # TODO: Handle failure
    # Return the deployed backup file to build rollback order, if necessary
    # or False if a user-initiated rollback completed successfully
    if not wait_for_deployment(client, deployment):
        client.restart_app(app.id)
    return False if not wait_for_deployment(client,
                                            deployment) else backup_path
Example #5
0
def update_app_tag(client: MarathonClient, appid: str, new_tag: str):
    app = client.get_app(appid)
    reg, img = mangling.split_image_name(app.container.docker.image)
    img, _ = mangling.split_image_tag(img)
    new_image = mangling.rebuild_image_name(reg, img, new_tag)
    app.container.docker.image = new_image
    deployment = client.update_app(appid, app, force=True)
    wait_for_deployment(client, deployment)
Example #6
0
def send_to_marathon(request):
    try:
        if request.method == 'POST':
            action = request.POST.get('action', None)
            id = request.POST.get('id', None)
            mc = MarathonClient('http://{}:{}'.format(
                settings.MARATHON['host'], settings.MARATHON['port']))
            if action == 'stop':
                mc.scale_app(id, 0, force=True)
            elif action == 'start':
                mc.scale_app(id, 1)
            elif action == 'destroy':
                if request.user.has_perm("auth.can_init_app"):
                    mc.delete_app(id)
                else:
                    raise PermissionDenied
            elif action == 'restart':
                mc.restart_app(id)
            elif action == 'scale':
                mc.scale_app(id, int(request.POST.get('number_instance')))
            elif action == 'update':
                app = mc.get_app(id)
                app.cpus = float(request.POST.get('cpus'))
                app.mem = float(request.POST.get('mem'))
                app.container.docker.image = request.POST.get('version')
                mc.update_app(id, app)
            elif action == "stop-deployment":
                mc.delete_deployment(id)
            result = '{"status":"success", "msg": "%(action)s success"}' % {
                "action": action
            }
    except Exception as e:
        result = '{"status":"error", "msg": "%(action)s fail: %(error)s" }' % {
            "action": action,
            "error": html.escape(str(e))
        }
    return HttpResponse(result)
Example #7
0
def re_deploy(app_name, app_file):
    """Calls marathon API to redeploy application with new file as request body

    :param app_name:
    :param app_file:
    :return:
    """
    with open(app_file, 'r') as content_file:
        content = content_file.read()
    app_attr = json.loads(content)
    marathon_addresses = _addresses()
    cli = MarathonClient(marathon_addresses)
    if _is_deployed(cli, app_name):
        return cli.update_app(app_name, models.MarathonApp.from_json(app_attr))
    else:
        return None
def re_deploy(app_name, app_file):
    """Calls marathon API to redeploy application with new file as request body

    :param app_name:
    :param app_file:
    :return:
    """
    with open(app_file, 'r') as content_file:
        content = content_file.read()
    app_attr = json.loads(content)
    marathon_addresses = _addresses()
    cli = MarathonClient(marathon_addresses)
    if _is_deployed(cli, app_name):
        return cli.update_app(app_name, models.MarathonApp.from_json(app_attr))
    else:
        return None
Example #9
0
def deploy(app_definition, marathon_url, instances, auth_token, zero, force):
    old_appids = []
    # Connect to Marathon
    print("\nConnecting to Marathon...")
    c = MarathonClient(marathon_url, auth_token=auth_token)
    print("Connected to", marathon_url)

    # Pick up the Marathon App Definition file
    app_json = open(app_definition).read()
    app = MarathonApp.from_json(json.loads(app_json))
    new_app_id = app.id
    service_name = new_app_id.split("/")[-1].split(".")[0]

    # Instantiate the new application on DC/OS but don't launch it yet
    # The application definition instances field should be 0 by default
    # If forced, the application will be relaunched even if the ID already exists
    print("\nInstantiating new application on Marathon with", app.instances,
          "instances...")
    try:
        c.create_app(new_app_id, app)
    except:
        if force == 'Yes':
            print("\nForcing redeploy of the same app id...", new_app_id)
            c.update_app(new_app_id, app, force=True, minimal=True)
            check_deployment(c, new_app_id)
            pass
        else:
            sys.exit()
    print("Created app", new_app_id)

    # List and find currently running apps of the same service
    # This assumes the naming convention (id): /some/group/service_name.uniquevalue
    print("\nFinding any existing apps for service:", service_name)
    for app in c.list_apps():
        existing_service_name = app.id.split("/")[-1].split(".")[0]
        if (service_name == existing_service_name) and app.instances > 0:
            print("Found up and running application id:", app.id)
            old_appids.append(app.id)

    # If it's the first deployment ever, just launch the desired number of instances
    # Otherwise perform a hybrid release
    # Finally clean up any older app instances running
    if not old_appids:
        if instances is None:
            instances = 2
        print("No current apps found. Launching brand new service with",
              instances, "instances...")
        c.scale_app(new_app_id, instances=instances)
        check_deployment(c, new_app_id)
        check_health(c, new_app_id)

    else:
        old_appids.reverse()
        if zero == 'Yes':
            print("\nStarting zero downtime deployment for...", new_app_id)
            for old_appid in old_appids:
                if instances is None:
                    instances = c.get_app(old_appid).instances
                if (old_appid == '' or old_appid == new_app_id
                        or old_appid == '/' + new_app_id):
                    print("Scaling existing app_id", new_app_id, "to",
                          instances, "instances...")
                    c.scale_app(new_app_id, instances=instances)
                    check_deployment(c, new_app_id)
                    check_health(c, new_app_id)

                else:
                    print("Target number of total instances:", instances)
                    delta = int(round(instances * .50))
                    delta = (delta if delta > 0 else 1)

                    scale(c, new_app_id, old_appid, delta)

                    if (c.get_app(new_app_id).instances != instances):
                        print("\nLaunch", instances - delta,
                              "remaining instance(s) of the new version...")
                        c.scale_app(new_app_id, instances=instances)
                        check_deployment(c, new_app_id)
                        check_health(c, new_app_id)
                    if (c.get_app(old_appid).instances > 0):
                        print(
                            "Finish shutting down remaining instances of the old version..."
                        )
                        c.scale_app(old_appid, instances=0)
                        check_deployment(c, old_appid)
        else:
            print("Started deployment with downtime...")
            for old_appid in old_appids:
                c.scale_app(old_appid, instances=0)
                check_deployment(c, old_appid)
            c.scale_app(new_app_id, instances=instances)
            check_deployment(c, new_app_id)
            check_health(c, new_app_id)

    print("\nSUCCESS:\nNew application ID:", new_app_id,
          "\nRunning instances:", instances)
Example #10
0
class MarathonWorkers(object):

    def __init__(self, scheduler, marathon, name=None, nprocs=1, nthreads=0,
                 docker='daskos/daskathon', volumes=[], **kwargs):
        self.scheduler = scheduler
        self.executor = ThreadPoolExecutor(1)
        self.client = MarathonClient(marathon)
        self.name = name or 'dask-%s' % uuid.uuid4()
        self.docker = docker
        self.volumes = volumes
        self.nprocs = nprocs
        self.nthreads = nthreads
        self.options = kwargs

    def start(self, nworkers=0):
        # address = self.scheduler.address.replace('tcp://', '')
        args = ['dask-worker', self.scheduler.address,
                '--name', '$MESOS_TASK_ID',  # use Mesos task ID as worker name
                '--worker-port', '$PORT_WORKER',
                '--bokeh-port', '$PORT_BOKEH',
                '--nanny-port', '$PORT_NANNY',
                '--nprocs', str(self.nprocs),
                '--nthreads', str(self.nthreads)]

        ports = [{'port': 0,
                  'protocol': 'tcp',
                  'name': name}
                 for name in ['worker', 'nanny', 'http', 'bokeh']]

        # healths = [{'portIndex': i,
        #             'protocol': 'TCP',
        #             'gracePeriodSeconds': 300,
        #             'intervalSeconds': 60,
        #             'timeoutSeconds': 20,
        #             'maxConsecutiveFailures': 3}
        # for i, name in enumerate(['worker', 'nanny', 'http', 'bokeh'])]
        healths = []

        if 'mem' in self.options:
            args.extend(['--memory-limit',
                         str(int(self.options['mem'] * 0.8 * 1e6))])

        docker_parameters = [{"key": "volume", "value": v}
                             for v in self.volumes]
        container = MarathonContainer({'image': self.docker,
                                       'forcePullImage': True,
                                       'parameters': docker_parameters})
        command = ' '.join(args)

        app = MarathonApp(instances=nworkers, container=container,
                          port_definitions=ports, cmd=command,
                          health_checks=healths,
                          **self.options)
        self.client.update_app(self.name, app)
        logger.info('Started marathon workers {}'.format(self.name))

    def close(self):
        logger.info('Stopping marathon workers {}'.format(self.name))
        self.client.delete_app(self.name, force=True)

    def scale_up(self, n):
        self.executor.submit(self.client.scale_app, self.name,
                             instances=n)

    def scale_down(self, workers):
        for worker in workers:
            self.executor.submit(self.client.kill_task, self.name,
                                 self.scheduler.worker_info[worker]['name'],
                                 scale=True)
Example #11
0
 def update_app_from_json(self, json_data, force):
     a = MarathonApp.from_json(json_data)
     return MarathonClient.update_app(self, a.id, a, force)
Example #12
0
                                verify=False)
    except MarathonError as e:
        logging.error("Failed to connect to Marathon! {}".format(e))
        exit_code = 1
        sys.exit(exit_code)

    logging.info("Deploying application...")
    try:
        app = client.get_app(marathon_app_id)
    except MarathonHttpError:
        response = client.create_app(marathon_app_id, app_definition)
        version = response.version
        depolyment_id = response.deployments[0].id
    else:
        response = client.update_app(marathon_app_id,
                                     app_definition,
                                     force=marathon_force)
        version = response['version']
        deployment_id = response['deploymentId']

    logging.info("New version deployed: {}".format(version))

    if app_definition.instances == 0:
        logging.info(
            "Deactivated application by setting instances to 0, deployment complete."
        )
        exit_code = 0
        sys.exit(exit_code)

    ### Get newly created Mesos task
Example #13
0
def deploy(marathon, name, docker, volume, scheduler_cpus, scheduler_mem,
           adaptive, port, bokeh_port, constraint, maximum_over_capacity,
           minimum_health_capacity, label, uri, jupyter, **kwargs):
    name = name or 'daskathon-{}'.format(str(uuid.uuid4())[-4:])

    kwargs['name'] = '{}-workers'.format(name)
    kwargs['docker'] = docker
    kwargs['port'] = port
    kwargs['bokeh_port'] = bokeh_port

    args = [('--{}'.format(k.replace('_', '-')), str(v))
            for k, v in kwargs.items() if v not in (None, '')]

    for c in constraint:
        args.append(('--constraint', c))
    for u in uri:
        args.append(('--uri', u))
    for v in volume:
        args.append(('--volume', v))

    if maximum_over_capacity:
        args.append(('--maximum-over-capacity', str(maximum_over_capacity)))

    if minimum_health_capacity:
        args.append(
            ('--minimum-health-capacity', str(minimum_health_capacity)))

    args = list(concat(args))
    if adaptive:
        args.append('--adaptive')

    client = MarathonClient(marathon)
    docker_parameters = [{"key": "volume", "value": v} for v in volume]
    container = MarathonContainer({
        'image': docker,
        'forcePullImage': True,
        'parameters': docker_parameters
    })
    args = ['daskathon', 'run'] + args + [marathon]
    cmd = ' '.join(args)

    healths = [{
        'portIndex': i,
        'protocol': 'TCP'
    } for i, _ in enumerate(['scheduler', 'bokeh'])]

    services = [('scheduler', port), ('bokeh', bokeh_port)]
    ports = [{
        'port': p,
        'protocol': 'tcp',
        'name': service
    } for (service, p) in services]

    constraints = [c.split(':')[:3] for c in constraint]
    labels = dict([l.split(':') for l in label])
    upgrade_strategy = {
        'maximum_over_capacity': maximum_over_capacity,
        'minimum_health_capacity': minimum_health_capacity
    }

    scheduler = MarathonApp(instances=1,
                            container=container,
                            cpus=scheduler_cpus,
                            mem=scheduler_mem,
                            task_kill_grace_period_seconds=20,
                            port_definitions=ports,
                            health_checks=healths,
                            constraints=constraints,
                            upgrade_strategy=upgrade_strategy,
                            labels=labels,
                            uris=uri,
                            require_ports=True,
                            cmd=cmd)
    client.update_app('{}-scheduler'.format(name), scheduler)

    if jupyter:
        cmd = ('jupyter notebook --allow-root --no-browser '
               '--NotebookApp.token=\'\' --ip 0.0.0.0 --port $PORT_NOTEBOOK')
        ports = [{'port': 0, 'protocol': 'tcp', 'name': 'notebook'}]
        jupyter = deepcopy(scheduler)
        jupyter.cmd = cmd
        jupyter.port_definitions = ports
        client.update_app('{}-jupyter'.format(name), jupyter)
class MarathonSpawner(Spawner):

    app_image = Unicode("jupyterhub/singleuser:%s" % _jupyterhub_xy,
                        config=True)

    app_prefix = Unicode("jupyter",
                         help=dedent("""
            Prefix for app names. The full app name for a particular
            user will be <prefix>/<username>/notebook.
            """)).tag(config=True)

    marathon_host = Unicode(
        u'', help="Hostname of Marathon server").tag(config=True)

    marathon_constraints = List(
        [],
        help='Constraints to be passed through to Marathon').tag(config=True)

    unreachable_strategy = Any(
        None,
        help='Unreachable strategy to be passed through to Marathon').tag(
            config=True)

    volumes = List([],
                   help=dedent("""
            A list in Marathon REST API format for mounting volumes into the docker container.
            [
                {
                    "containerPath": "/foo",
                    "hostPath": "/bar",
                    "mode": "RW"
                }
            ]

            Note that using the template variable {username} in containerPath,
            hostPath or the name variable in case it's an external drive
            it will be replaced with the current user's name.
            """)).tag(config=True)

    max_cpu = Float(2, config=True)
    cpu = Float(1, config=True)

    max_mem = Float(4096, config=True)
    mem = Float(1024, config=True)

    max_disk = Float(20000, config=True)
    disk = Float(5000, config=True)

    max_gpu = Integer(0, config=True)
    gpu = Integer(0, config=True)

    mesos_user = Unicode(None, config=True, allow_none=True)

    autotimeout = Integer(
        None,
        help="Seconds to automatically timeout unused notebook servers",
        config=True,
        allow_none=True)

    hub_ip_connect = Unicode(
        "", help="Public IP address of the hub").tag(config=True)

    @observe('hub_ip_connect')
    def _ip_connect_changed(self, change):
        if jupyterhub.version_info >= (0, 8):
            warnings.warn(
                "MarathonSpawner.hub_ip_connect is no longer needed with JupyterHub 0.8."
                "  Use JupyterHub.hub_connect_ip instead.",
                DeprecationWarning,
            )

    hub_port_connect = Integer(-1,
                               help="Public PORT of the hub").tag(config=True)

    @observe('hub_port_connect')
    def _port_connect_changed(self, change):
        if jupyterhub.version_info >= (0, 8):
            warnings.warn(
                "MarathonSpawner.hub_port_connect is no longer needed with JupyterHub 0.8."
                "  Use JupyterHub.hub_connect_port instead.",
                DeprecationWarning,
            )

    format_volume_name = Any(
        help="""Any callable that accepts a string template and a Spawner
        instance as parameters in that order and returns a string.
        """).tag(config=True)

    @default('format_volume_name')
    def _get_default_format_volume_name(self):
        return default_format_volume_name

    # fix default port to 8888, used in the container
    @default('port')
    def _port_default(self):
        return 8888

    # default to listening on all-interfaces in the container
    @default('ip')
    def _ip_default(self):
        return '0.0.0.0'

    _executor = None

    @property
    def executor(self):
        cls = self.__class__
        if cls._executor is None:
            cls._executor = ThreadPoolExecutor(5)
        return cls._executor

    def __init__(self, *args, **kwargs):
        super(MarathonSpawner, self).__init__(*args, **kwargs)
        self.marathon = MarathonClient(self.marathon_host)
        self.get_state()

    @property
    def app_id(self):
        return '/%s/%s/notebook' % (self.app_prefix, self.user.name)

    def get_state(self):
        state = super(MarathonSpawner, self).get_state()
        state['user_options'] = self.stored_user_options = self.user_options
        return state

    def load_state(self, state):
        super(MarathonSpawner, self).load_state(state)
        self.stored_user_options = state.get('user_options', {})

    def get_health_checks(self):
        health_checks = []
        health_checks.append(
            MarathonHealthCheck(protocol='TCP',
                                port_index=0,
                                grace_period_seconds=300,
                                interval_seconds=30,
                                timeout_seconds=20,
                                max_consecutive_failures=0))
        return health_checks

    def get_volumes(self):
        volumes = []
        for v in self.volumes:
            mv = MarathonContainerVolume.from_json(v)
            mv.container_path = self.format_volume_name(
                mv.container_path, self)
            mv.host_path = self.format_volume_name(mv.host_path, self)
            if mv.external and 'name' in mv.external:
                mv.external['name'] = self.format_volume_name(
                    mv.external['name'], self)
            volumes.append(mv)
        return volumes

    def get_constraints(self):
        constraints = []
        for c in self.marathon_constraints:
            constraints.append(MarathonConstraint.from_json(c))
        return constraints

    def get_ip_and_port(self, app_info):
        assert len(app_info.tasks) == 1
        ip = socket.gethostbyname(app_info.tasks[0].host)
        return (ip, app_info.tasks[0].ports[0])

    @run_on_executor
    def get_app_info(self, app_id):
        try:
            app = self.marathon.get_app(app_id, embed_tasks=True)
        except NotFoundError:
            self.log.info("The %s application has not been started yet",
                          app_id)
            return None
        else:
            return app

    def _public_hub_api_url(self):
        uri = urlparse(self.hub.api_url)
        port = self.hub_port_connect if self.hub_port_connect > 0 else uri.port
        ip = self.hub_ip_connect if self.hub_ip_connect else uri.hostname
        return urlunparse((uri.scheme, '%s:%s' % (ip, port), uri.path,
                           uri.params, uri.query, uri.fragment))

    def get_args(self):
        args = super().get_args()
        if self.hub_ip_connect:
            # JupyterHub 0.7 specifies --hub-api-url
            # on the command-line, which is hard to update
            for idx, arg in enumerate(list(args)):
                if arg.startswith('--hub-api-url='):
                    args.pop(idx)
                    break
            args.append('--hub-api-url=%s' % self._public_hub_api_url())
        for idx, arg in enumerate(list(args)):
            if arg.startswith('--port='):
                args.pop(idx)
                break
        args.append('--port=$PORT0')
        return args

    def options_from_form(self, formdata):
        options = {}
        options['app_image'] = formdata['app_image'][0] or None
        if 'force_pull_image' in formdata:
            options['force_pull_image'] = formdata['force_pull_image'][
                0] == 'on'
        options['cpu'] = float(formdata['cpu'][0])
        options['mem'] = float(formdata['mem'][0])
        options['disk'] = float(formdata['disk'][0])
        if formdata.get('gpu', None):
            options['gpu'] = int(formdata['gpu'][0])
        return options

    @property
    def options_form(self):
        template = """
        <div class="form-group">
            <label for="app_image">Image <span class="label label-default">Optional</span></label>
            <input id="app_image" class="form-control" name="app_image" type="text" placeholder="e.g. %(default_app_image)s" value="%(app_image)s" />
        </div>
        <div class="checkbox">
            <label for="force_pull_image">
                <input id="force_pull_image" name="force_pull_image" type="checkbox" value="on" />
                Force pull image
            </label>
        </div>
        <div class="form-group">
            <div class="row">
                <div class="col-sm-4">
                    <label for="cpu">CPU</label>
                    <input id="cpu" class="form-control" name="cpu" type="number" step="any" value="%(cpu)s" min="%(min_cpu)s" max="%(max_cpu)s" required />
                </div>
                <div class="col-sm-4">
                    <label for="mem">Mem (MiB)</label>
                    <input id="mem" class="form-control" name="mem" type="number" step="any" value="%(mem)s" min="%(min_mem)s" max="$(max_mem)s" required />
                </div>
                <div class="col-sm-4">
                    <label for="disk">Disk (MiB)</label>
                    <input id="disk" class="form-control" name="disk" type="number" step="any" value="%(disk)s" min="%(min_disk)s" max="%(max_disk)s" required />
                </div>
            </div>
        </div>
        """ % {
            'default_app_image':
            self.app_image,
            'app_image':
            self.stored_user_options.get('app_image', None) or '',
            'min_cpu':
            0.001,
            'max_cpu':
            self.max_cpu,
            'cpu':
            remove_zeros(str(self.stored_user_options.get('cpu', self.cpu))),
            'min_mem':
            32,
            'max_mem':
            self.max_mem,
            'mem':
            remove_zeros(str(self.stored_user_options.get('mem', self.mem))),
            'min_disk':
            1000,
            'max_disk':
            self.max_disk,
            'disk':
            remove_zeros(str(self.stored_user_options.get('disk', self.disk))),
        }
        if self.max_gpu > 0:
            template += """
            <div class="form-group">
                <div class="row">
                    <div class="col-sm-4">
                        <label for="gpu">GPU</label>
                        <input id="gpu" class="form-control" name="gpu" type="number" step="1" value="%(gpu)s" min="%(min_gpu)s" max="%(max_gpu)s" required />
                    </div>
                </div>
            </div>
            """ % {
                'min_gpu': 0,
                'max_gpu': self.max_gpu,
                'gpu': self.stored_user_options.get('gpu', self.gpu),
            }
        return """<div>%s</div>""" % template

    @gen.coroutine
    def start(self):
        app_image = self.user_options.get('app_image', None) or self.app_image
        force_pull_image = self.user_options.get('force_pull_image', False)
        self.log.info("starting a Marathon app with image=%s" % app_image)

        container_params = {
            'image': app_image,
            'force_pull_image': force_pull_image
        }
        docker_container = MarathonDockerContainer(**container_params)

        app_container = MarathonContainer(docker=docker_container,
                                          type='MESOS',
                                          volumes=self.get_volumes())

        cpu = self.user_options.get('cpu', None)
        mem = self.user_options.get('mem', None)
        disk = self.user_options.get('disk', None)
        gpu = self.user_options.get('gpu', None)
        self.log.info("resource: (cpu=%s, mem=%s, disk=%s, gpu=%s)" %
                      (cpu, mem, disk, gpu))

        cmd = self.cmd + self.get_args()
        env = self.get_env()

        port_definitions = [PortDefinition(port=0, protocol='tcp')]

        app_request = MarathonApp(
            id=self.app_id,
            cmd=' '.join(
                cmd),  # cmd does not use Docker image's default entrypoint
            env=env,
            cpus=cpu,
            mem=mem,
            disk=disk,
            gpus=gpu,
            user=self.mesos_user,
            container=app_container,
            port_definitions=port_definitions,
            networks=[{
                'mode': 'host'
            }],
            constraints=self.get_constraints(),
            health_checks=self.get_health_checks(),
            unreachable_strategy=self.unreachable_strategy,
            instances=1)

        app_info = self.get_app_info(self.app_id)
        try:
            if app_info:
                self.marathon.update_app(self.app_id, app_request, force=True)
            else:
                self.marathon.create_app(self.app_id, app_request)
        except Exception as e:
            self.log.error("Failed to create application for %s: %s",
                           self.app_id, e)
            raise e

        while True:
            app_info = yield self.get_app_info(self.app_id)
            if app_info is None:
                raise MarathonSpawnerException("Application %s is lost",
                                               self.app_id)
            elif app_info.instances == 0:
                raise MarathonSpawnerException(
                    "No instance for application %s", self.app_id)
            elif app_info.tasks_healthy == 1:
                ip, port = self.get_ip_and_port(app_info)
                break
            yield gen.sleep(1)
        return (ip, port)

    @gen.coroutine
    def stop(self, now=False):
        try:
            self.marathon.update_app(self.app_id,
                                     MarathonApp(instances=0),
                                     force=True)
        except Exception as e:
            self.log.error("Failed to delete application %s", self.app_id)
            raise e
        else:
            if not now:
                while True:
                    app_info = yield self.get_app_info(self.app_id)
                    if app_info is None:
                        # Stopping application is lost, just ignore it!
                        break
                    elif len(app_info.deployments) == 0:
                        # This is the success case.
                        break
                    yield gen.sleep(1)

    @gen.coroutine
    def poll(self):
        app_info = yield self.get_app_info(self.app_id)

        if app_info is None:
            self.log.error("Application %s is lost", self.app_id)
            return 3

        for deployment in app_info.deployments:
            for current_action in deployment.current_actions:
                if current_action.action == 'StopApplication':
                    self.log.error("Application %s is shutting down",
                                   self.app_id)
                    return 1

        if app_info.tasks_healthy == 0:
            self.log.error("No healthy instance for application %s",
                           self.app_id)
            return 2

        if self.autotimeout is not None:
            tm_diff = datetime.utcnow() - self.user.last_activity
            self.log.debug("Application %s is inactive for %d sec",
                           self.app_id, tm_diff.seconds)
            if tm_diff > timedelta(seconds=self.autotimeout):
                self.log.info(
                    "Stopping application %s because it's inactive for more than %d sec",
                    self.app_id, self.autotimeout)
                # Do not yield the result of stop here
                self.stop()
                return 0

        return None
Example #15
0
 def update_app_from_json( self, json_data, force ):
   a = MarathonApp.from_json(json_data)
   return MarathonClient.update_app(self, a.id, a, force)