Exemple #1
0
def create_public(group_name, pod_name, appname):
    """参数同private, 只是不能指定需要的核心数量"""
    data = request.get_json()

    if data.get('raw', ''):
        vstr = consts.RAW_VERSION_PLACEHOLDER
    else:
        vstr = data['version']

    group, pod, application, version = validate_instance(group_name,
            pod_name, appname, vstr)

    networks = Network.get_multi(data.get('networks', []))
    spec_ips = data.get('spec_ips', [])
    ncontainer = int(data['ncontainer'])
    appconfig = version.appconfig
    if not data['entrypoint'] in appconfig.entrypoints:
        current_app.logger.error('Entrypoint not in app.yaml (entry=%s, name=%s, version=%s)',
                data['entrypoint'], appname, version.short_sha)
        raise EruAbortException(consts.HTTP_BAD_REQUEST, 'Entrypoint %s not in app.yaml' % data['entrypoint'])

    ts, keys = [], []
    with rds.lock('%s:%s' % (group_name, pod_name)):
        hosts = pod.get_free_public_hosts(ncontainer)
        for host in itertools.islice(itertools.cycle(hosts), ncontainer):
            t = _create_task(consts.TASK_CREATE, version, host, 1,
                    {}, 0, networks, spec_ips, data['entrypoint'], data['env'],
                    image=data.get('image', ''))
            if not t:
                continue
            ts.append(t.id)
            keys.append(t.result_key)

    return {'r':0, 'msg': 'ok', 'tasks': ts, 'watch_keys': keys}
Exemple #2
0
def create_public(group_name, pod_name, appname):
    """参数同private, 只是不能指定需要的核心数量"""
    data = request.get_json()

    group, pod, application, version = validate_instance(group_name,
            pod_name, appname, data['version'])

    networks = Network.get_multi(data.get('networks', []))
    ncontainer = int(data['ncontainer'])
    appconfig = version.appconfig
    if not data['entrypoint'] in appconfig.entrypoints:
        raise EruAbortException(code.HTTP_BAD_REQUEST, 'Entrypoint %s not in app.yaml' % data['entrypoint'])

    ts, keys = [], []
    with rds.lock('%s:%s' % (group_name, pod_name)):
        hosts = pod.get_free_public_hosts(ncontainer)
        for host in itertools.islice(itertools.cycle(hosts), ncontainer):
            t = _create_task(code.TASK_CREATE, version, host, 1,
                    {}, 0, networks, data['entrypoint'], data['env'])
            if not t:
                continue
            ts.append(t.id)
            keys.append(t.result_key)

    return {'r':0, 'msg': 'ok', 'tasks': ts, 'watch_keys': keys}
Exemple #3
0
def create_public(group_name, pod_name, appname):
    """
    ncontainer: int container nums
    version: string deploy version
    expose: bool true or false, default true
    """
    data = request.get_json()

    group, pod, application, version = validate_instance(group_name,
            pod_name, appname, data['version'])

    networks = Network.get_multi(data.get('networks', []))
    ncontainer = int(data['ncontainer'])
    appconfig = version.appconfig
    if not data['entrypoint'] in appconfig.entrypoints:
        raise EruAbortException(code.HTTP_BAD_REQUEST, 'Entrypoint %s not in app.yaml' % data['entrypoint'])

    tasks_info = []
    with rds.lock('%s:%s' % (group_name, pod_name)):
        try:
            # 轮询, 尽可能均匀部署
            hosts = pod.get_free_public_hosts(ncontainer)
            for host in itertools.islice(itertools.cycle(hosts), ncontainer):
                tasks_info.append(
                    (version, host, 1, [], networks, data['entrypoint'], data['env'])
                )
        except Exception, e:
            logger.exception(e)
            raise EruAbortException(code.HTTP_BAD_REQUEST, str(e))
Exemple #4
0
def create_private(group_name, pod_name, appname):
    """ncore: 需要的核心数, 可以是小数, 例如1.5个"""
    data = request.get_json()

    if data.get('raw', ''):
        vstr = consts.RAW_VERSION_PLACEHOLDER
    else:
        vstr = data['version']

    group, pod, application, version = validate_instance(group_name,
            pod_name, appname, vstr)

    # TODO check if group has this pod

    core_require = int(float(data['ncore']) * pod.core_share) # 是说一个容器要几个核...
    ncore = core_require / pod.core_share
    nshare = core_require % pod.core_share

    ncontainer = int(data['ncontainer'])
    networks = Network.get_multi(data.get('networks', []))
    spec_ips = data.get('spec_ips', [])
    appconfig = version.appconfig

    # 指定的host, 如果没有则按照编排分配host
    hostname = data.get('hostname', '')
    host = hostname and Host.get_by_name(hostname) or None
    if host and not (host.group_id == group.id and host.pod_id == pod.id):
        current_app.logger.error('Host must belong to pod/group (hostname=%s, pod=%s, group=%s)',
                host, pod_name, group_name)
        raise EruAbortException(consts.HTTP_BAD_REQUEST, 'Host must belong to this pod and group')

    if not data['entrypoint'] in appconfig.entrypoints:
        current_app.logger.error('Entrypoint not in app.yaml (entry=%s, name=%s, version=%s)',
                data['entrypoint'], appname, version.short_sha)
        raise EruAbortException(consts.HTTP_BAD_REQUEST, 'Entrypoint %s not in app.yaml' % data['entrypoint'])

    ts, keys = [], []
    with rds.lock('%s:%s' % (group_name, pod_name)):
        host_cores = group.get_free_cores(pod, ncontainer, ncore, nshare, spec_host=host)
        if not host_cores:
            current_app.logger.error('Not enough cores (name=%s, version=%s, ncore=%s)',
                    appname, version.short_sha, data['ncore'])
            raise EruAbortException(consts.HTTP_BAD_REQUEST, 'Not enough core resources')

        for (host, container_count), cores in host_cores.iteritems():
            t = _create_task(consts.TASK_CREATE, version, host, container_count,
                    cores, nshare, networks, spec_ips, data['entrypoint'], data['env'],
                    image=data.get('image', ''))
            if not t:
                continue

            host.occupy_cores(cores, nshare)
            ts.append(t.id)
            keys.append(t.result_key)

    return {'r': 0, 'msg': 'ok', 'tasks': ts, 'watch_keys': keys}
Exemple #5
0
def create_public(group_name, pod_name, appname):
    data = request.get_json()
    vstr = data['version']
    group, pod, _, version = validate_instance(group_name, pod_name, appname, vstr)

    ports = data.get('ports', [])
    args = data.get('args', [])

    callback_url = data.get('callback_url', '')
    if callback_url and not is_strict_url(callback_url):
        abort(400, 'callback_url must starts with http:// or https://')

    networks = Network.get_multi(data.get('networks', []))
    spec_ips = data.get('spec_ips', [])
    appconfig = version.appconfig

    ncontainer = int(data['ncontainer'])
    if not ncontainer:
        abort(400, 'ncontainer must be > 0')

    entrypoint = data['entrypoint']
    if entrypoint not in appconfig.entrypoints:
        abort(400, 'Entrypoint %s not in app.yaml' % entrypoint)

    ts, keys = [], []
    with rds.lock('%s:%s' % (group_name, pod_name)):
        hosts = pod.get_free_public_hosts(ncontainer)
        for host in itertools.islice(itertools.cycle(hosts), ncontainer):
            t = _create_task(
                version,
                host,
                1,
                {},
                0,
                networks,
                ports,
                args,
                spec_ips,
                data['entrypoint'],
                data['env'],
                image=data.get('image', ''),
                callback_url=callback_url,
            )
            if not t:
                continue
            ts.append(t.id)
            keys.append(t.result_key)

    return {'r':0, 'msg': 'ok', 'tasks': ts, 'watch_keys': keys}
Exemple #6
0
def create_public(group_name, pod_name, appname):
    """参数同private, 只是不能指定需要的核心数量"""
    data = request.get_json()

    if data.get("raw", ""):
        vstr = consts.RAW_VERSION_PLACEHOLDER
    else:
        vstr = data["version"]

    group, pod, application, version = validate_instance(group_name, pod_name, appname, vstr)

    networks = Network.get_multi(data.get("networks", []))
    spec_ips = data.get("spec_ips", [])
    ncontainer = int(data["ncontainer"])
    appconfig = version.appconfig
    if not data["entrypoint"] in appconfig.entrypoints:
        current_app.logger.error(
            "Entrypoint not in app.yaml (entry=%s, name=%s, version=%s)", data["entrypoint"], appname, version.short_sha
        )
        raise EruAbortException(consts.HTTP_BAD_REQUEST, "Entrypoint %s not in app.yaml" % data["entrypoint"])

    ts, keys = [], []
    with rds.lock("%s:%s" % (group_name, pod_name)):
        hosts = pod.get_free_public_hosts(ncontainer)
        for host in itertools.islice(itertools.cycle(hosts), ncontainer):
            t = _create_task(
                consts.TASK_CREATE,
                version,
                host,
                1,
                {},
                0,
                networks,
                spec_ips,
                data["entrypoint"],
                data["env"],
                image=data.get("image", ""),
            )
            if not t:
                continue
            ts.append(t.id)
            keys.append(t.result_key)

    return {"r": 0, "msg": "ok", "tasks": ts, "watch_keys": keys}
Exemple #7
0
def create_private(group_name, pod_name, appname):
    """
    ncore: int cpu num per container -1 means share
    ncontainer: int container nums
    version: string deploy version
    expose: bool true or false, default true
    """
    data = request.get_json()
    group, pod, application, version = validate_instance(group_name,
            pod_name, appname, data['version'])

    # TODO check if group has this pod

    ncore = int(data['ncore']) # 是说一个容器要几个核...
    ncontainer = int(data['ncontainer'])
    networks = Network.get_multi(data.get('networks', []))
    appconfig = version.appconfig

    if not data['entrypoint'] in appconfig.entrypoints:
        raise EruAbortException(code.HTTP_BAD_REQUEST, 'Entrypoint %s not in app.yaml' % data['entrypoint'])

    tasks_info = []
    with rds.lock('%s:%s' % (group_name, pod_name)):
        # 不够了
        if ncore > 0 and group.get_max_containers(pod, ncore) < ncontainer:
            raise EruAbortException(code.HTTP_BAD_REQUEST, 'Not enough cores')

        try:
            host_cores = group.get_free_cores(pod, ncontainer, ncore)
            # 这个pod都不够host了
            if not host_cores:
                raise EruAbortException(code.HTTP_BAD_REQUEST, 'Not enough cores')

            for (host, container_count), cores in host_cores.iteritems():
                tasks_info.append(
                    (version, host, container_count, cores, networks, data['entrypoint'], data['env'])
                )
                host.occupy_cores(cores)
        except Exception, e:
            logger.exception(e)
            raise EruAbortException(code.HTTP_BAD_REQUEST, str(e))
Exemple #8
0
def create_containers_with_macvlan(task_id, ncontainer, core_ids, network_ids):
    """
    执行task_id的任务. 部署ncontainer个容器, 占用core_ids这些核, 绑定到networks这些子网
    """
    task = Task.get(task_id)
    if not task:
        return

    networks = Network.get_multi(network_ids)

    notifier = TaskNotifier(task)
    host = task.host
    version = task.version
    entrypoint = task.props['entrypoint']
    env = task.props['env']
    used_cores = Core.get_multi(core_ids)

    pub_agent_vlan_key = 'eru:agent:%s:vlan' % host.name
    feedback_key = 'eru:agent:%s:feedback' % task_id

    cids = []

    for cores in more_itertools.chunked(used_cores, len(core_ids)/ncontainer):
        try:
            cid, cname = dockerjob.create_one_container(host, version,
                    entrypoint, env, cores)
        except:
            host.release_cores(cores)
            continue

        ips = [n.acquire_ip() for n in networks]
        ip_dict = {ip.vlan_address: ip for ip in ips}

        if ips:
            ident_id = cname.split('_')[-1]
            values = [str(task_id), cid, ident_id] + ['{0}:{1}'.format(ip.vlan_seq_id, ip.vlan_address) for ip in ips]
            rds.publish(pub_agent_vlan_key, '|'.join(values))

        for _ in ips:
            # timeout 15s
            rv = rds.blpop(feedback_key, 15)
            if rv is None:
                break
            # rv is like (feedback_key, 'succ|container_id|vethname|ip')
            succ, _, vethname, vlan_address = rv[1].split('|')
            if succ == '0':
                break
            ip = ip_dict.get(vlan_address, None)
            if ip:
                ip.set_vethname(vethname)

        else:
            logger.info('Creating container with cid %s and ips %s' % (cid, ips))
            c = Container.create(cid, host, version, cname, entrypoint, cores, env)
            for ip in ips:
                ip.assigned_to_container(c)
            notifier.notify_agent(cid)
            add_container_for_agent(c)
            add_container_backends(c)
            cids.append(cid)
            # 略过清理工作
            continue

        # 清理掉失败的容器, 释放核, 释放ip
        logger.info('Cleaning failed container with cid %s' % cid)
        dockerjob.remove_container_by_cid([cid], host)
        host.release_cores(cores)
        [ip.release() for ip in ips]

    publish_to_service_discovery(version.name)
    task.finish_with_result(code.TASK_SUCCESS, container_ids=cids)
    notifier.pub_success()
Exemple #9
0
def create_containers_with_macvlan_public(task_id, ncontainer, nshare, network_ids, spec_ips=None):
    """
    执行task_id的任务. 部署ncontainer个容器, 绑定到networks这些子网
    """
    current_flask.logger.info('Task<id=%s>: Started', task_id)
    task = Task.get(task_id)
    if not task:
        current_flask.logger.error('Task (id=%s) not found, quit', task_id)
        return

    if spec_ips is None:
        spec_ips = []

    networks = Network.get_multi(network_ids)

    notifier = TaskNotifier(task)
    host = task.host
    version = task.version
    entrypoint = task.props['entrypoint']
    env = task.props['env']
    # use raw
    image = task.props['image']
    cpu_shares = 1024

    pub_agent_vlan_key = 'eru:agent:%s:vlan' % host.name
    feedback_key = 'eru:agent:%s:feedback' % task_id

    cids = []

    for _ in range(ncontainer):
        try:
            cid, cname = dockerjob.create_one_container(host, version,
                entrypoint, env, cores=None, cpu_shares=cpu_shares, image=image)
        except Exception as e:
            print e # 同上
            continue

        if spec_ips:
            ips = [n.acquire_specific_ip(ip) for n, ip in zip(networks, spec_ips)]
        else:
            ips = [n.acquire_ip() for n in networks]
        ips = [i for i in ips if i]
        ip_dict = {ip.vlan_address: ip for ip in ips}

        if ips:
            ident_id = cname.split('_')[-1]
            values = [str(task_id), cid, ident_id] + ['{0}:{1}'.format(ip.vlan_seq_id, ip.vlan_address) for ip in ips]
            rds.publish(pub_agent_vlan_key, '|'.join(values))

        for _ in ips:
            # timeout 15s
            rv = rds.blpop(feedback_key, 15)
            if rv is None:
                break
            # rv is like (feedback_key, 'succ|container_id|vethname|ip')
            succ, _, vethname, vlan_address = rv[1].split('|')
            if succ == '0':
                break
            ip = ip_dict.get(vlan_address, None)
            if ip:
                ip.set_vethname(vethname)

        else:
            current_flask.logger.info('Creating container (cid=%s, ips=%s)', cid, ips)
            c = Container.create(cid, host, version, cname, entrypoint, {}, env, nshare)
            for ip in ips:
                ip.assigned_to_container(c)
            notifier.notify_agent(cid)
            add_container_for_agent(c)
            add_container_backends(c)
            cids.append(cid)
            # 略过清理工作
            continue

        # 清理掉失败的容器, 释放核, 释放ip
        current_flask.logger.info('Cleaning failed container (cid=%s)', cid)
        dockerjob.remove_container_by_cid([cid], host)
        [ip.release() for ip in ips]
        # 失败了就得清理掉这个key
        rds.delete(feedback_key)

    publish_to_service_discovery(version.name)
    task.finish_with_result(consts.TASK_SUCCESS, container_ids=cids)
    notifier.pub_success()
    current_flask.logger.info('Task<id=%s>: Done', task_id)
Exemple #10
0
def create_containers_with_macvlan(task_id, ncontainer, nshare, cores, network_ids, spec_ips=None):
    """
    执行task_id的任务. 部署ncontainer个容器, 占用*_core_ids这些核, 绑定到networks这些子网
    """
    current_flask.logger.info('Task<id=%s>: Started', task_id)
    task = Task.get(task_id)
    if not task:
        current_flask.logger.error('Task (id=%s) not found, quit', task_id)
        return

    if spec_ips is None:
        spec_ips = []

    need_network = bool(network_ids)
    networks = Network.get_multi(network_ids)

    notifier = TaskNotifier(task)
    host = task.host
    version = task.version
    entrypoint = task.props['entrypoint']
    env = task.props['env']
    ports = task.props['ports']
    args = task.props['args']
    # use raw
    route = task.props['route']
    image = task.props['image']
    callback_url = task.props['callback_url']
    cpu_shares = int(float(nshare) / host.pod.core_share * 1024) if nshare else 1024

    pub_agent_vlan_key = 'eru:agent:%s:vlan' % host.name
    pub_agent_route_key = 'eru:agent:%s:route' % host.name
    feedback_key = 'eru:agent:%s:feedback' % task_id

    cids = []

    for fcores, pcores in _iter_cores(cores, ncontainer):
        cores_for_one_container = {'full': fcores, 'part': pcores}
        try:
            cid, cname = dockerjob.create_one_container(host, version,
                entrypoint, env, fcores+pcores, ports=ports, args=args,
                cpu_shares=cpu_shares, image=image, need_network=need_network)
        except Exception as e:
            # 写给celery日志看
            print e
            host.release_cores(cores_for_one_container, nshare)
            continue

        if spec_ips:
            ips = [n.acquire_specific_ip(ip) for n, ip in zip(networks, spec_ips)]
        else:
            ips = [n.acquire_ip() for n in networks]
        ips = [i for i in ips if i]
        ip_dict = {ip.vlan_address: ip for ip in ips}

        if ips:
            if ERU_AGENT_API == 'pubsub':
                values = [str(task_id), cid] + ['{0}:{1}'.format(ip.vlan_seq_id, ip.vlan_address) for ip in ips]
                rds.publish(pub_agent_vlan_key, '|'.join(values))
            elif ERU_AGENT_API == 'http':
                agent = get_agent(host)
                ip_list = [(ip.vlan_seq_id, ip.vlan_address) for ip in ips]
                agent.add_container_vlan(cid, str(task_id), ip_list)

        for _ in ips:
            # timeout 15s
            rv = rds.blpop(feedback_key, 15)
            if rv is None:
                break
            # rv is like (feedback_key, 'succ|container_id|vethname|ip')
            succ, _, vethname, vlan_address = rv[1].split('|')
            if succ == '0':
                break
            ip = ip_dict.get(vlan_address, None)
            if ip:
                ip.set_vethname(vethname)

            if route:
                rds.publish(pub_agent_route_key, '%s|%s' % (cid, route))

        else:
            current_flask.logger.info('Creating container (cid=%s, ips=%s)', cid, ips)
            c = Container.create(cid, host, version, cname, entrypoint,
                    cores_for_one_container, env, nshare, callback_url)
            for ip in ips:
                ip.assigned_to_container(c)
            notifier.notify_agent(c)
            add_container_for_agent(c)
            add_container_backends(c)
            cids.append(cid)
            # 略过清理工作
            continue

        # 清理掉失败的容器, 释放核, 释放ip
        current_flask.logger.info('Cleaning failed container (cid=%s)', cid)
        dockerjob.remove_container_by_cid([cid], host)
        host.release_cores(cores_for_one_container, nshare)
        [ip.release() for ip in ips]
        # 失败了就得清理掉这个key
        rds.delete(feedback_key)

    publish_to_service_discovery(version.name)
    task.finish_with_result(consts.TASK_SUCCESS, container_ids=cids)
    notifier.pub_success()

    # 有IO, 丢最后面算了
    falcon_all_graphs(version)
    falcon_all_alarms(version)

    current_flask.logger.info('Task<id=%s>: Done', task_id)
Exemple #11
0
def create_private(group_name, pod_name, appname):
    data = request.get_json()
    vstr = data['version']
    group, pod, _, version = validate_instance(group_name, pod_name, appname, vstr)

    # TODO check if group has this pod
    ncore, nshare = pod.get_core_allocation(float(data['ncore']))
    ports = data.get('ports', [])
    args = data.get('args', [])
    strategy = data.get('strategy', 'average')

    callback_url = data.get('callback_url', '')
    if callback_url and not is_strict_url(callback_url):
        abort(400, 'callback_url must start with http:// or https://')

    ncontainer = int(data['ncontainer'])
    if not ncontainer:
        abort(400, 'ncontainer must be > 0')

    networks = Network.get_multi(data.get('networks', []))
    spec_ips = data.get('spec_ips', [])
    appconfig = version.appconfig

    entrypoint = data['entrypoint']
    if entrypoint not in appconfig.entrypoints:
        abort(400, 'Entrypoint %s not in app.yaml' % entrypoint)

    hostname = data.get('hostname', '')
    host = hostname and Host.get_by_name(hostname) or None
    if host and not (host.group_id == group.id and host.pod_id == pod.id):
        abort(400, 'Host must belong to this pod and group')

    ts, keys = [], []
    with rds.lock('%s:%s' % (group_name, pod_name)):
        host_cores = _get_strategy(strategy)(group, pod, ncontainer, ncore, nshare, host)
        if not host_cores:
            abort(400, 'Not enough core resources')

        for (host, container_count), cores in host_cores.iteritems():
            t = _create_task(
                version,
                host,
                container_count,
                cores,
                nshare,
                networks,
                ports,
                args,
                spec_ips,
                entrypoint,
                data['env'],
                image=data.get('image', ''),
                callback_url=callback_url,
            )
            if not t:
                continue

            host.occupy_cores(cores, nshare)
            ts.append(t.id)
            keys.append(t.result_key)

    return {'r': 0, 'msg': 'ok', 'tasks': ts, 'watch_keys': keys}
Exemple #12
0
def create_private(group_name, pod_name, appname):
    """ncore: 需要的核心数, 可以是小数, 例如1.5个"""
    data = request.get_json()

    if data.get("raw", ""):
        vstr = consts.RAW_VERSION_PLACEHOLDER
    else:
        vstr = data["version"]

    group, pod, application, version = validate_instance(group_name, pod_name, appname, vstr)

    # TODO check if group has this pod

    core_require = int(float(data["ncore"]) * pod.core_share)  # 是说一个容器要几个核...
    ncore = core_require / pod.core_share
    nshare = core_require % pod.core_share

    ncontainer = int(data["ncontainer"])
    networks = Network.get_multi(data.get("networks", []))
    spec_ips = data.get("spec_ips", [])
    appconfig = version.appconfig

    # 指定的host, 如果没有则按照编排分配host
    hostname = data.get("hostname", "")
    host = hostname and Host.get_by_name(hostname) or None
    if host and not (host.group_id == group.id and host.pod_id == pod.id):
        current_app.logger.error(
            "Host must belong to pod/group (hostname=%s, pod=%s, group=%s)", host, pod_name, group_name
        )
        raise EruAbortException(consts.HTTP_BAD_REQUEST, "Host must belong to this pod and group")

    if not data["entrypoint"] in appconfig.entrypoints:
        current_app.logger.error(
            "Entrypoint not in app.yaml (entry=%s, name=%s, version=%s)", data["entrypoint"], appname, version.short_sha
        )
        raise EruAbortException(consts.HTTP_BAD_REQUEST, "Entrypoint %s not in app.yaml" % data["entrypoint"])

    ts, keys = [], []
    with rds.lock("%s:%s" % (group_name, pod_name)):
        host_cores = group.get_free_cores(pod, ncontainer, ncore, nshare, spec_host=host)
        if not host_cores:
            current_app.logger.error(
                "Not enough cores (name=%s, version=%s, ncore=%s)", appname, version.short_sha, data["ncore"]
            )
            raise EruAbortException(consts.HTTP_BAD_REQUEST, "Not enough core resources")

        for (host, container_count), cores in host_cores.iteritems():
            t = _create_task(
                consts.TASK_CREATE,
                version,
                host,
                container_count,
                cores,
                nshare,
                networks,
                spec_ips,
                data["entrypoint"],
                data["env"],
                image=data.get("image", ""),
            )
            if not t:
                continue

            host.occupy_cores(cores, nshare)
            ts.append(t.id)
            keys.append(t.result_key)

    return {"r": 0, "msg": "ok", "tasks": ts, "watch_keys": keys}
Exemple #13
0
def create_containers_with_macvlan(task_id, ncontainer, nshare, cores, network_ids, spec_ips=None):
    """
    执行task_id的任务. 部署ncontainer个容器, 占用*_core_ids这些核, 绑定到networks这些子网
    """
    # TODO support part core
    current_flask.logger.info("Task<id=%s>: Started", task_id)
    task = Task.get(task_id)
    if not task:
        current_flask.logger.error("Task (id=%s) not found, quit", task_id)
        return

    if spec_ips is None:
        spec_ips = []

    networks = Network.get_multi(network_ids)

    notifier = TaskNotifier(task)
    host = task.host
    version = task.version
    entrypoint = task.props["entrypoint"]
    env = task.props["env"]
    # use raw
    image = task.props["image"]
    cpu_shares = int(float(nshare) / host.pod.core_share * 1024) if nshare else 1024

    pub_agent_vlan_key = "eru:agent:%s:vlan" % host.name
    feedback_key = "eru:agent:%s:feedback" % task_id

    cids = []

    full_cores, part_cores = cores.get("full", []), cores.get("part", [])
    for fcores, pcores in izip_longest(
        chunked(full_cores, len(full_cores) / ncontainer),
        chunked(part_cores, len(part_cores) / ncontainer),
        fillvalue=[],
    ):
        cores_for_one_container = {"full": fcores, "part": pcores}
        try:
            cid, cname = dockerjob.create_one_container(
                host, version, entrypoint, env, fcores + pcores, cpu_shares, image=image
            )
        except:
            host.release_cores(cores_for_one_container, nshare)
            continue

        if spec_ips:
            ips = [n.acquire_specific_ip(ip) for n, ip in zip(networks, spec_ips)]
        else:
            ips = [n.acquire_ip() for n in networks]
        ips = [i for i in ips if i]
        ip_dict = {ip.vlan_address: ip for ip in ips}

        if ips:
            ident_id = cname.split("_")[-1]
            values = [str(task_id), cid, ident_id] + ["{0}:{1}".format(ip.vlan_seq_id, ip.vlan_address) for ip in ips]
            rds.publish(pub_agent_vlan_key, "|".join(values))

        for _ in ips:
            # timeout 15s
            rv = rds.blpop(feedback_key, 15)
            if rv is None:
                break
            # rv is like (feedback_key, 'succ|container_id|vethname|ip')
            succ, _, vethname, vlan_address = rv[1].split("|")
            if succ == "0":
                break
            ip = ip_dict.get(vlan_address, None)
            if ip:
                ip.set_vethname(vethname)

        else:
            current_flask.logger.info("Creating container (cid=%s, ips=%s)", cid, ips)
            c = Container.create(cid, host, version, cname, entrypoint, cores_for_one_container, env, nshare)
            for ip in ips:
                ip.assigned_to_container(c)
            notifier.notify_agent(cid)
            add_container_for_agent(c)
            add_container_backends(c)
            cids.append(cid)
            # 略过清理工作
            continue

        # 清理掉失败的容器, 释放核, 释放ip
        current_flask.logger.info("Cleaning failed container (cid=%s)", cid)
        dockerjob.remove_container_by_cid([cid], host)
        host.release_cores(cores_for_one_container, nshare)
        [ip.release() for ip in ips]
        # 失败了就得清理掉这个key
        rds.delete(feedback_key)

    publish_to_service_discovery(version.name)
    task.finish_with_result(consts.TASK_SUCCESS, container_ids=cids)
    notifier.pub_success()
    current_flask.logger.info("Task<id=%s>: Done", task_id)
Exemple #14
0
def create_private(group_name, pod_name, appname):
    """ncore: 需要的核心数, 可以是小数, 例如1.5个"""
    data = request.get_json()

    vstr = data['version']

    group, pod, application, version = validate_instance(group_name,
            pod_name, appname, vstr)

    # TODO check if group has this pod

    core_require = int(float(data['ncore']) * pod.core_share) # 是说一个容器要几个核...
    ncore = core_require / pod.core_share
    nshare = core_require % pod.core_share

    ports = data.get('ports', [])
    args = data.get('args', [])

    ncontainer = int(data['ncontainer'])
    networks = Network.get_multi(data.get('networks', []))
    spec_ips = data.get('spec_ips', [])
    entrypoint = data['entrypoint']
    appconfig = version.appconfig

    strategy = data.get('strategy', 'average')

    # 指定的host, 如果没有则按照编排分配host
    hostname = data.get('hostname', '')
    host = hostname and Host.get_by_name(hostname) or None
    if host and not (host.group_id == group.id and host.pod_id == pod.id):
        current_app.logger.error('Host must belong to pod/group (hostname=%s, pod=%s, group=%s)',
                host, pod_name, group_name)
        raise EruAbortException(consts.HTTP_BAD_REQUEST, 'Host must belong to this pod and group')

    if not entrypoint in appconfig.entrypoints:
        current_app.logger.error('Entrypoint not in app.yaml (entry=%s, name=%s, version=%s)',
                entrypoint, appname, version.short_sha)
        raise EruAbortException(consts.HTTP_BAD_REQUEST, 'Entrypoint %s not in app.yaml' % entrypoint)

    route = appconfig.entrypoints[entrypoint].get('network_route', '')

    ts, keys = [], []
    with rds.lock('%s:%s' % (group_name, pod_name)):
        if strategy == 'average':
            host_cores = average_schedule(group, pod, ncontainer, ncore, nshare, spec_host=host)
        elif strategy == 'centralized':
            host_cores = centralized_schedule(group, pod, ncontainer, ncore, nshare, spec_host=host)
        else:
            raise EruAbortException(consts.HTTP_BAD_REQUEST, 'strategy %s not supported' % strategy)

        if not host_cores:
            current_app.logger.error('Not enough cores (name=%s, version=%s, ncore=%s)',
                    appname, version.short_sha, data['ncore'])
            raise EruAbortException(consts.HTTP_BAD_REQUEST, 'Not enough core resources')

        for (host, container_count), cores in host_cores.iteritems():
            t = _create_task(
                version,
                host,
                container_count,
                cores,
                nshare,
                networks,
                ports,
                args,
                spec_ips,
                route,
                data['entrypoint'],
                data['env'],
                image=data.get('image', ''),
            )
            if not t:
                continue

            host.occupy_cores(cores, nshare)
            ts.append(t.id)
            keys.append(t.result_key)

    return {'r': 0, 'msg': 'ok', 'tasks': ts, 'watch_keys': keys}