Exemple #1
0
def rm_containers():
    cids = request.get_json()['cids']

    if not all(len(cid) >= 7 for cid in cids):
        abort(400, 'must given at least 7 chars for container_id')

    version_dict = {}
    for cid in cids:
        container = Container.get_by_container_id(cid)
        if not container:
            continue
        version_dict.setdefault((container.version, container.host), []).append(container)

    ts, watch_keys = [], []
    for (version, host), containers in version_dict.iteritems():
        cids = [c.id for c in containers]
        task = Task.create(consts.TASK_REMOVE, version, host, {'container_ids': cids})

        all_host_cids = [c.id for c in Container.get_multi_by_host(host) if c and c.version_id == version.id]
        need_to_delete_image = set(cids) == set(all_host_cids)

        remove_containers.apply_async(
            args=(task.id, cids, need_to_delete_image),
            task_id='task:%d' % task.id
        )
        ts.append(task.id)
        watch_keys.append(task.result_key)
    return {'r': 0, 'msg': 'ok', 'tasks': ts, 'watch_keys': watch_keys}
Exemple #2
0
def rm_containers():
    cids = request.get_json()['cids']

    if not all(len(cid) >= 7 for cid in cids):
        abort(400, 'must given at least 7 chars for container_id')

    version_dict = {}
    for cid in cids:
        container = Container.get_by_container_id(cid)
        if not container:
            continue
        version_dict.setdefault((container.version, container.host), []).append(container)

    task_ids, watch_keys = [], []
    for (version, host), containers in version_dict.iteritems():
        cids = [c.id for c in containers]
        task = Task.create(TASK_REMOVE, version, host, {'container_ids': cids})

        all_host_cids = [c.id for c in Container.get_multi_by_host(host) if c and c.version_id == version.id]
        need_to_delete_image = set(cids) == set(all_host_cids)

        remove_containers.apply_async(
            args=(task.id, cids, need_to_delete_image),
            task_id='task:%d' % task.id
        )
        task_ids.append(task.id)
        watch_keys.append(task.result_key)
    return {'tasks': task_ids, 'watch_keys': watch_keys}
Exemple #3
0
def cure_container(cid):
    c = Container.get_by_container_id(cid)
    if c and not c.is_alive:
        rebind_container_ip(c)
        c.cure()
        current_app.logger.info('Cure container (container_id=%s)', cid[:7])
    return {'r': 0, 'msg': consts.OK}
Exemple #4
0
def stop_container(cid):
    c = Container.get_by_container_id(cid)
    if c:
        c.kill()
        dockerjob.stop_containers([c,], c.host)
        current_app.logger.info('Stop container (container_id=%s)', cid[:7])
    return {'r': 0, 'msg': consts.OK}
Exemple #5
0
def remove_containers(task_id, cids, rmi=False):
    current_flask.logger.info('Task<id=%s>: Started', task_id)
    task = Task.get(task_id)
    if not task:
        current_flask.logger.error('Task (id=%s) not found, quit', task_id)
        return

    notifier = TaskNotifier(task)
    containers = Container.get_multi(cids)
    container_ids = [c.container_id for c in containers]
    host = task.host
    try:
        flags = {'eru:agent:%s:container:flag' % cid: 1 for cid in container_ids}
        rds.mset(**flags)
        for c in containers:
            remove_container_backends(c)
            current_flask.logger.info('Task<id=%s>: Container (cid=%s) backends removed',
                    task_id, c.container_id[:7])
        appnames = {c.appname for c in containers}
        publish_to_service_discovery(*appnames)

        dockerjob.remove_host_containers(containers, host)
        current_flask.logger.info('Task<id=%s>: Containers (cids=%s) removed', task_id, cids)
        if rmi:
            dockerjob.remove_image(task.version, host)
    except Exception, e:
        task.finish_with_result(consts.TASK_FAILED)
        notifier.pub_fail()
        current_flask.logger.error('Task<id=%s>: Exception (e=%s)', task_id, e)
Exemple #6
0
def fix_core(host):
    containers = Container.get_multi_by_host(host)

    # 没有的话, 直接销毁重建
    if not containers:
        rds.delete(host._cores_key)
        _create_cores_on_host(host, host.ncore)
        return

    data = {str(i): host.core_share for i in xrange(host.ncore)}

    for c in containers:
        cores = c.cores
        nshare = int(cores.get("nshare", "0"))
        for e in cores.get("full", []):
            e.remain = host.core_share
            data.pop(e.label)
        for s in cores.get("part", []):
            s.remain = host.core_share - nshare
            data[s.label] -= nshare
        c.cores = cores
    rds.delete(host._cores_key)
    if data:
        rds.zadd(host._cores_key, **data)
    print "done", host
Exemple #7
0
def test_container_release_cores(test_db):
    a = App.get_or_create('app', 'http://git.hunantv.com/group/app.git')
    v = a.add_version(random_sha1())
    p = Pod.create('pod', 'pod', 10, -1)
    host = Host.create(p, random_ipv4(), random_string(), random_uuid(), 200, 0)

    for core in host.cores:
        assert core.host_id == host.id
        assert core.remain == 10

    containers = []
    
    cores = sorted(host.cores, key=operator.attrgetter('label'))
    for fcores, pcores in zip(chunked(cores[:100], 10), chunked(cores[100:], 10)):
        used_cores = {'full': fcores, 'part': pcores}
        host.occupy_cores(used_cores, 5)
        c = Container.create(random_sha1(), host, v, random_string(), 'entrypoint', used_cores, 'env', nshare=5)
        containers.append(c)

    cores = sorted(host.cores, key=operator.attrgetter('label'))
    for fcores, pcores in zip(chunked(cores[:100], 10), chunked(cores[100:], 10)):
        for core in fcores:
            assert core.remain == 0
        for core in pcores:
            assert core.remain == 5

    for c in containers:
        c.delete()

    cores = sorted(host.cores, key=operator.attrgetter('label'))
    for fcores, pcores in zip(chunked(cores[:100], 10), chunked(cores[100:], 10)):
        for core in fcores:
            assert core.remain == 10
        for core in pcores:
            assert core.remain == 10
Exemple #8
0
def container_log(cid):
    stderr = request.args.get('stderr', type=int, default=0)
    stdout = request.args.get('stdout', type=int, default=0)
    tail = request.args.get('tail', type=int, default=10)

    # docker client's argument
    if tail == 0:
        tail = 'all'

    ws = request.environ['wsgi.websocket']
    container = Container.get_by_container_id(cid)
    if not container:
        ws.close()
        _log.info('Container %s not found, close websocket' % cid)
        return 'websocket closed'
    try:
        client = get_docker_client(container.host.addr)
        for line in client.logs(cid,
                                stream=True,
                                stderr=bool(stderr),
                                stdout=bool(stdout),
                                tail=tail):
            ws.send(line)
    except geventwebsocket.WebSocketError, e:
        _log.exception(e)
Exemple #9
0
def start_container(cid):
    c = Container.get_by_container_id(cid)
    if c:
        c.cure()
        dockerjob.start_containers([c, ], c.host)
        rebind_container_ip(c)
    return {'r': 0, 'msg': code.OK}
Exemple #10
0
def start_container(cid):
    c = Container.get_by_container_id(cid)
    if c and not c.is_alive:
        c.cure()
        dockerjob.start_containers([c, ], c.host)
        rebind_container_ip(c)
        current_app.logger.info('Start container (container_id=%s)', cid[:7])
    return {'r': 0, 'msg': consts.OK}
Exemple #11
0
def _clean_failed_containers(cid):
    # 清理掉失败的容器, 释放核, 释放ip
    _log.info('Cleaning failed container (cid=%s)', cid)
    container = Container.get_by_container_id(cid)
    if not container:
        return

    dockerjob.remove_container_by_cid([cid], container.host)
    container.delete()
Exemple #12
0
def _clean_failed_containers(cid):
    # 清理掉失败的容器, 释放核, 释放ip
    _log.info('Cleaning failed container (cid=%s)', cid)
    container = Container.get_by_container_id(cid)
    if not container:
        return

    dockerjob.remove_container_by_cid([cid], container.host)
    container.delete()
Exemple #13
0
def remove_containers(task_id, cids, rmi=False):
    task = Task.get(task_id)
    if not task:
        _log.error('Task (id=%s) not found, quit', task_id)
        return

    _log.info('Task<id=%s>: Start on host %s', task_id, task.host.ip)
    notifier = TaskNotifier(task)

    containers = Container.get_multi(cids)
    if not containers:
        _log.error('Task (id=%s) no container found, quit')
        return

    host = containers[0].host

    for c in containers:
        c.in_removal = 1

    container_ids = [c.container_id for c in containers if c]
    try:
        set_flag_for_agent(container_ids)
        for c in containers:
            remove_container_backends(c)
            _log.info('Task<id=%s>: Container (cid=%s) backends removed',
                      task_id, c.short_id)

        appnames = {c.appname for c in containers}
        publish_to_service_discovery(*appnames)

        time.sleep(3)

        dockerjob.remove_host_containers(containers, host)
        _log.info('Task<id=%s>: Containers (cids=%s) removed', task_id, cids)

        if rmi:
            try:
                dockerjob.remove_image(task.version, host)
            except Exception as e:
                _log.error('Task<id=%s>, fail to remove image', task_id, e)
    except Exception as e:
        task.finish(consts.TASK_FAILED)
        task.reason = str(e.message)
        notifier.pub_fail()
        _log.error('Task<id=%s> exception', task_id)
        _log.exception(e)
    else:
        for c in containers:
            c.delete()
        task.finish(consts.TASK_SUCCESS)
        task.reason = 'ok'
        notifier.pub_success()
        remove_container_for_agent(host, container_ids)
        remove_flag_for_agent(container_ids)
        _log.info('Task<id=%s>: Done', task_id)
Exemple #14
0
def remove_containers(task_id, cids, rmi=False):
    task = Task.get(task_id)
    if not task:
        _log.error('Task (id=%s) not found, quit', task_id)
        return

    _log.info('Task<id=%s>: Start on host %s', task_id, task.host.ip)
    notifier = TaskNotifier(task)

    containers = Container.get_multi(cids)
    if not containers:
        _log.error('Task (id=%s) no container found, quit')
        return

    host = containers[0].host

    for c in containers:
        c.in_removal = 1

    container_ids = [c.container_id for c in containers if c]
    try:
        set_flag_for_agent(container_ids)
        for c in containers:
            remove_container_backends(c)
            _log.info('Task<id=%s>: Container (cid=%s) backends removed', task_id, c.short_id)

        appnames = {c.appname for c in containers}
        publish_to_service_discovery(*appnames)

        time.sleep(3)

        dockerjob.remove_host_containers(containers, host)
        _log.info('Task<id=%s>: Containers (cids=%s) removed', task_id, cids)

        if rmi:
            try:
                dockerjob.remove_image(task.version, host)
            except Exception as e:
                _log.error('Task<id=%s>, fail to remove image', task_id, e)
    except Exception as e:
        task.finish(consts.TASK_FAILED)
        task.reason = str(e.message)
        notifier.pub_fail()
        _log.error('Task<id=%s> exception', task_id)
        _log.exception(e)
    else:
        for c in containers:
            c.delete()
        task.finish(consts.TASK_SUCCESS)
        task.reason = 'ok'
        notifier.pub_success()
        remove_container_for_agent(host, container_ids)
        remove_flag_for_agent(container_ids)
        _log.info('Task<id=%s>: Done', task_id)
Exemple #15
0
def kill_container(cid):
    c = Container.get_by_container_id(cid)
    if c:
        c.kill()
        key = consts.ERU_AGENT_DIE_REASON % c.container_id
        r = rds.get(key)
        rds.delete(key)
        if r is not None:
            c.set_props({'oom': 1})
        current_app.logger.info('Kill container (container_id=%s)', cid[:7])
    return {'r': 0, 'msg': consts.OK}
Exemple #16
0
def test_container_transform(test_db):
    a = App.get_or_create('app', 'http://git.hunantv.com/group/app.git', '')
    assert a is not None

    v = a.add_version(random_sha1())
    v2 = a.add_version(random_sha1())
    assert v is not None
    assert v.app.id == a.id
    assert v.name == a.name
    assert len(v.containers.all()) == 0
    assert len(v.tasks.all()) == 0

    g = Group.create('group', 'group')
    p = Pod.create('pod', 'pod')
    assert p.assigned_to_group(g)
    hosts = [Host.create(p, random_ipv4(), random_string(prefix='host'),
        random_uuid(), 4, 4096) for i in range(6)]

    for host in hosts[:3]:
        host.assigned_to_group(g)

    assert g.get_max_containers(p, 3, 0) == 3
    host_cores = g.get_free_cores(p, 3, 3, 0)
    assert len(host_cores) == 3

    containers = []
    for (host, count), cores in host_cores.iteritems():
        cores_per_container = len(cores) / count
        for i in range(count):
            cid = random_sha1()
            used_cores = {'full': cores['full'][i*cores_per_container:(i+1)*cores_per_container]}
            c = Container.create(cid, host, v, random_string(), 'entrypoint', used_cores, 'env')
            assert c is not None
            containers.append(c)
        host.occupy_cores(cores, 0)

    for host in g.private_hosts.all():
        assert len(host.get_free_cores()[0]) == 1
        assert len(host.containers.all()) == 1
        assert host.count == 1

    assert len(containers) == 3
    assert len(v.containers.all()) == 3

    cids = [c.container_id for c in containers]
    for c in containers:
        host = c.host
        cid = c.container_id
        c.transform(v2, random_sha1(), random_string())
        assert c.container_id != cid

    new_cids = [c.container_id for c in containers]
    assert new_cids != cids
Exemple #17
0
def remove_containers(task_id, cids, rmi=False):
    task = Task.get(task_id)
    if not task:
        current_flask.logger.error('Task (id=%s) not found, quit', task_id)
        return

    current_flask.logger.info('Task<id=%s>: Start on host %s' % (task_id, task.host.ip))
    notifier = TaskNotifier(task)
    containers = Container.get_multi(cids)
    container_ids = [c.container_id for c in containers if c]
    host = task.host
    version = task.version
    try:
        # flag, don't report these
        flags = {'eru:agent:%s:container:flag' % cid: 1 for cid in container_ids}
        rds.mset(**flags)
        for c in containers:
            remove_container_backends(c)
            current_flask.logger.info('Task<id=%s>: Container (cid=%s) backends removed',
                    task_id, c.container_id[:7])
        appnames = {c.appname for c in containers}
        publish_to_service_discovery(*appnames)

        time.sleep(3)

        dockerjob.remove_host_containers(containers, host)
        current_flask.logger.info('Task<id=%s>: Containers (cids=%s) removed', task_id, cids)
        if rmi:
            try:
                dockerjob.remove_image(task.version, host)
            except Exception as e:
                current_flask.logger.error('Task<id=%s>: Exception (e=%s), fail to remove image', task_id, e)
    except Exception as e:
        task.finish_with_result(consts.TASK_FAILED)
        notifier.pub_fail()
        current_flask.logger.error('Task<id=%s>: Exception (e=%s)', task_id, e)
    else:
        for c in containers:
            c.delete()
        task.finish_with_result(consts.TASK_SUCCESS)
        notifier.pub_success()
        if container_ids:
            rds.hdel('eru:agent:%s:containers:meta' % host.name, *container_ids)
        rds.delete(*flags.keys())
        current_flask.logger.info('Task<id=%s>: Done', task_id)

    if not version.containers.count():
        falcon_remove_alarms(version)
Exemple #18
0
def rm_containers():
    cids = request.get_json()["cids"]
    version_dict = {}
    ts, watch_keys = [], []
    for cid in cids:
        container = Container.get_by_container_id(cid)
        if not container:
            continue
        version_dict.setdefault((container.version, container.host), []).append(container)
    for (version, host), containers in version_dict.iteritems():
        cids = [c.id for c in containers]
        task_props = {"container_ids": cids}
        task = Task.create(consts.TASK_REMOVE, version, host, task_props)
        remove_containers.apply_async(args=(task.id, cids, False), task_id="task:%d" % task.id)
        ts.append(task.id)
        watch_keys.append(task.result_key)
    return {"r": 0, "msg": "ok", "tasks": ts, "watch_keys": watch_keys}
Exemple #19
0
def test_container_release_cores(test_db):
    a = App.get_or_create('app', 'http://git.hunantv.com/group/app.git')
    v = a.add_version(random_sha1())
    p = Pod.create('pod', 'pod', 10, -1)
    host = Host.create(p, random_ipv4(), random_string(), random_uuid(), 200,
                       0)

    for core in host.cores:
        assert core.host_id == host.id
        assert core.remain == 10

    containers = []

    cores = sorted(host.cores, key=operator.attrgetter('label'))
    for fcores, pcores in zip(chunked(cores[:100], 10),
                              chunked(cores[100:], 10)):
        used_cores = {'full': fcores, 'part': pcores}
        host.occupy_cores(used_cores, 5)
        c = Container.create(random_sha1(),
                             host,
                             v,
                             random_string(),
                             'entrypoint',
                             used_cores,
                             'env',
                             nshare=5)
        containers.append(c)

    cores = sorted(host.cores, key=operator.attrgetter('label'))
    for fcores, pcores in zip(chunked(cores[:100], 10),
                              chunked(cores[100:], 10)):
        for core in fcores:
            assert core.remain == 0
        for core in pcores:
            assert core.remain == 5

    for c in containers:
        c.delete()

    cores = sorted(host.cores, key=operator.attrgetter('label'))
    for fcores, pcores in zip(chunked(cores[:100], 10),
                              chunked(cores[100:], 10)):
        for core in fcores:
            assert core.remain == 10
        for core in pcores:
            assert core.remain == 10
Exemple #20
0
def create_test_suite():
    appyaml = {
        'appname': 'app',
        'entrypoints': {
            'web': {
                'cmd': 'python app.py',
                'ports': ['5000/tcp'],
            },
            'daemon': {
                'cmd': 'python daemon.py',
            },
            'service': {
                'cmd': 'python service.py'
            },
        },
        'build': 'pip install -r ./requirements.txt',
    }
    app = App.get_or_create('app', 'http://git.hunantv.com/group/app.git')
    version = app.add_version(random_sha1())
    appconfig = version.appconfig
    appconfig.update(**appyaml)
    appconfig.save()

    pod = Pod.create('pod', 'pod')

    hosts = [
        Host.create(pod, random_ipv4(), random_string(prefix='host'),
                    random_uuid(), 4, 4096) for i in range(4)
    ]

    containers = []
    for (host, count), cores in centralized_schedule(pod, 4, 4, 0).iteritems():
        cores_per_container = len(cores) / count
        for i in range(count):
            cid = random_sha1()
            used_cores = {
                'full':
                cores['full'][i * cores_per_container:(i + 1) *
                              cores_per_container]
            }
            c = Container.create(cid, host, version, random_string(), 'web',
                                 used_cores, 'env')
            containers.append(c)
        host.occupy_cores(cores, 0)
    return app, version, pod, hosts, containers
Exemple #21
0
def migrate_container(container_id, need_to_remove=True):
    container = Container.get_by_container_id(container_id)
    if not container:
        _log.error('container %s is not found, ignore migration', container_id)
        return

    ncore, nshare = container.host.pod.get_core_allocation(container.ncore)
    host_cores = average_schedule(container.host.pod, 1, ncore, nshare, None)
    if not host_cores:
        _log.error('not enough cores to migrate')
        return

    cids = [container.id]
    spec_ips = cidrs = container.get_ips()
    (host, container_count), cores = next(host_cores.iteritems())

    props = {
        'ncontainer': 1,
        'entrypoint': container.entrypoint,
        'env': container.env,
        'full_cores': [c.label for c in cores.get('full', [])],
        'part_cores': [c.label for c in cores.get('part', [])],
        'ports': None,
        'args': None,
        'nshare': nshare,
        'networks': cidrs,
        'image': None,
        'route': '',
        'callback_url': container.callback_url,
        'container_ids': cids,
    }
    task = Task.create(consts.TASK_MIGRATE, container.version, host, props)
    if not task:
        _log.error('create migrate task error')
        return

    _log.info('start migration...')
    if need_to_remove:
        remove_containers.apply(args=(task.id, cids, False),
                                task_id='task:%s' % task.id)
    create_containers.apply(args=(task.id, 1, nshare, cores, cidrs, spec_ips),
                            task_id='task:%s' % task.id)
    _log.info('migration done')
Exemple #22
0
def create_test_suite():
    appyaml = {
        'appname': 'app',
        'entrypoints': {
            'web': {
                'cmd': 'python app.py',
                'ports': ['5000/tcp'],
            },
            'daemon': {
                'cmd': 'python daemon.py',
            },
            'service': {
                'cmd': 'python service.py'
            },
        },
        'build': 'pip install -r ./requirements.txt',
    }
    app = App.get_or_create('app', 'http://git.hunantv.com/group/app.git', 'token')
    version = app.add_version(random_sha1())
    appconfig = version.appconfig
    appconfig.update(**appyaml)
    appconfig.save()

    group = Group.create('group', 'group')
    pod = Pod.create('pod', 'pod')
    pod.assigned_to_group(group)

    hosts = [Host.create(pod, random_ipv4(), random_string(prefix='host'),
        random_uuid(), 4, 4096) for i in range(4)]

    for host in hosts:
        host.assigned_to_group(group)

    containers = []
    for (host, count), cores in group.get_free_cores(pod, 4, 4, 0).iteritems():
        cores_per_container = len(cores) / count
        for i in range(count):
            cid = random_sha1()
            used_cores = cores['full'][i*cores_per_container:(i+1)*cores_per_container]
            c = Container.create(cid, host, version, random_string(), 'entrypoint', used_cores, 'env')
            containers.append(c)
        host.occupy_cores(cores, 0)
    return app, version, group, pod, hosts, containers
Exemple #23
0
def rm_containers():
    cids = request.get_json()['cids']
    version_dict = {}
    ts, watch_keys = [], []
    for cid in cids:
        container = Container.get_by_container_id(cid)
        if not container:
            continue
        version_dict.setdefault((container.version, container.host), []).append(container)
    for (version, host), containers in version_dict.iteritems():
        cids = [c.id for c in containers]
        task_props = {'container_ids': cids}
        task = Task.create(consts.TASK_REMOVE, version, host, task_props)
        remove_containers.apply_async(
            args=(task.id, cids, False),
            task_id='task:%d' % task.id
        )
        ts.append(task.id)
        watch_keys.append(task.result_key)
    return {'r': 0, 'msg': 'ok', 'tasks': ts, 'watch_keys': watch_keys}
Exemple #24
0
def migrate_container(container_id, need_to_remove=True):
    container = Container.get_by_container_id(container_id)
    if not container:
        _log.error('container %s is not found, ignore migration', container_id)
        return

    ncore, nshare= container.host.pod.get_core_allocation(container.ncore)
    host_cores = average_schedule(container.host.pod, 1, ncore, nshare, None)
    if not host_cores:
        _log.error('not enough cores to migrate')
        return

    cids = [container.id]
    spec_ips = cidrs = container.get_ips()
    (host, container_count), cores = next(host_cores.iteritems())

    props = {
        'ncontainer': 1,
        'entrypoint': container.entrypoint,
        'env': container.env,
        'full_cores': [c.label for c in cores.get('full', [])],
        'part_cores': [c.label for c in cores.get('part', [])],
        'ports': None,
        'args': None,
        'nshare': nshare,
        'networks': cidrs,
        'image': None,
        'route': '',
        'callback_url': container.callback_url,
        'container_ids': cids,
    }
    task = Task.create(consts.TASK_MIGRATE, container.version, host, props)
    if not task:
        _log.error('create migrate task error')
        return

    _log.info('start migration...')
    if need_to_remove:
        remove_containers.apply(args=(task.id, cids, False), task_id='task:%s' % task.id)
    create_containers.apply(args=(task.id, 1, nshare, cores, cidrs, spec_ips), task_id='task:%s' % task.id)
    _log.info('migration done')
Exemple #25
0
def remove_containers(task_id, cids, rmi):
    task = Task.get(task_id)
    notifier = TaskNotifier(task)
    containers = Container.get_multi(cids)
    container_ids = [c.container_id for c in containers]
    host = task.host
    try:
        flags = {'eru:agent:%s:container:flag' % cid: 1 for cid in container_ids}
        rds.mset(**flags)
        for c in containers:
            remove_container_backends(c)
        appnames = {c.appname for c in containers}
        publish_to_service_discovery(*appnames)

        dockerjob.remove_host_containers(containers, host)
        if rmi:
            dockerjob.remove_image(task.version, host)
    except Exception, e:
        logger.exception(e)
        task.finish_with_result(code.TASK_FAILED)
        notifier.pub_fail()
Exemple #26
0
def container_log(cid):
    stderr = request.args.get('stderr', type=int, default=0)
    stdout = request.args.get('stdout', type=int, default=0)
    tail = request.args.get('tail', type=int, default=10)

    # docker client's argument
    if tail == 0:
        tail = 'all'

    ws = request.environ['wsgi.websocket']
    container = Container.get_by_container_id(cid)
    if not container:
        ws.close()
        logger.info('Container %s not found, close websocket' % cid)
        return 'websocket closed'
    try:
        client = get_docker_client(container.host.addr)
        for line in client.logs(cid, stream=True, stderr=bool(stderr), stdout=bool(stdout), tail=tail):
            ws.send(line)
    except geventwebsocket.WebSocketError, e:
        logger.exception(e)
Exemple #27
0
def bind_network(cid):
    data = request.get_json()
    appname = data.get('appname')
    c = Container.get_by_container_id(cid)
    if not (c and c.is_alive):
        raise EruAbortException(consts.HTTP_NOT_FOUND, 'Container %s not found' % cid)
    if c.appname != appname:
        raise EruAbortException(consts.HTTP_NOT_FOUND, 'Container does not belong to app')

    network_names = data.get('networks', [])
    networks = filter(None, [Network.get_by_name(n) for n in network_names])
    if not networks:
        raise EruAbortException(consts.HTTP_BAD_REQUEST, 'network empty')

    ips = filter(None, [n.acquire_ip() for n in networks])
    if not ips:
        raise EruAbortException(consts.HTTP_BAD_REQUEST, 'no ip available')

    nid = max([ip.network_id for ip in c.ips.all()] + [-1]) + 1
    bind_container_ip(c, ips, nid=nid)
    for ip in ips:
        ip.assigned_to_container(c)
    return {'r': 0, 'msg': ips}
Exemple #28
0
def fix_core(host):
    containers = Container.get_multi_by_host(host)

    # 没有的话, 直接销毁重建
    if not containers:
        _create_cores_on_host(host, host.ncore)
        return

    data = {str(i): host.core_share for i in xrange(host.ncore)}

    for c in containers:
        cores = c.cores
        nshare = int(cores.get('nshare', '0'))
        for e in cores.get('full', []):
            e.remain = host.core_share
            data.pop(e.label)
        for s in cores.get('part', []):
            s.remain = host.core_share - nshare
            data[s.label] -= nshare
        c.cores = cores
    rds.delete(host._cores_key)
    if data:
        rds.zadd(host._cores_key, **data)
    print 'done', host
Exemple #29
0
def rm_containers():
    cids = request.get_json()['cids']

    if not all(len(cid) >= 7 for cid in cids):
        raise EruAbortException(consts.HTTP_BAD_REQUEST, 'must given at least 7 chars for container_id')

    version_dict = {}
    ts, watch_keys = [], []
    for cid in cids:
        container = Container.get_by_container_id(cid)
        if not container:
            continue
        version_dict.setdefault((container.version, container.host), []).append(container)
    for (version, host), containers in version_dict.iteritems():
        cids = [c.id for c in containers]
        task_props = {'container_ids': cids}
        task = Task.create(consts.TASK_REMOVE, version, host, task_props)
        remove_containers.apply_async(
            args=(task.id, cids, False),
            task_id='task:%d' % task.id
        )
        ts.append(task.id)
        watch_keys.append(task.result_key)
    return {'r': 0, 'msg': 'ok', 'tasks': ts, 'watch_keys': watch_keys}
Exemple #30
0
def poll_container(cid):
    c = Container.get_by_container_id(cid)
    if not c:
        raise EruAbortException(consts.HTTP_NOT_FOUND, 'Container %s not found' % cid)
    return {'r': 0, 'container': c.container_id, 'status': c.is_alive}
Exemple #31
0
def cure_container(cid):
    c = Container.get_by_container_id(cid)
    if c:
        c.cure()
    return {'r':0, 'msg': code.OK}
Exemple #32
0
def create_containers_with_macvlan(task_id, ncontainer, nshare, cores, network_ids, spec_ips=None):
    """
    执行task_id的任务. 部署ncontainer个容器, 占用*_core_ids这些核, 绑定到networks这些子网
    """
    current_flask.logger.info('Task<id=%s>: Started', task_id)
    task = Task.get(task_id)
    if not task:
        current_flask.logger.error('Task (id=%s) not found, quit', task_id)
        return

    if spec_ips is None:
        spec_ips = []

    need_network = bool(network_ids)
    networks = Network.get_multi(network_ids)

    notifier = TaskNotifier(task)
    host = task.host
    version = task.version
    entrypoint = task.props['entrypoint']
    env = task.props['env']
    ports = task.props['ports']
    args = task.props['args']
    # use raw
    route = task.props['route']
    image = task.props['image']
    callback_url = task.props['callback_url']
    cpu_shares = int(float(nshare) / host.pod.core_share * 1024) if nshare else 1024

    pub_agent_vlan_key = 'eru:agent:%s:vlan' % host.name
    pub_agent_route_key = 'eru:agent:%s:route' % host.name
    feedback_key = 'eru:agent:%s:feedback' % task_id

    cids = []

    for fcores, pcores in _iter_cores(cores, ncontainer):
        cores_for_one_container = {'full': fcores, 'part': pcores}
        try:
            cid, cname = dockerjob.create_one_container(host, version,
                entrypoint, env, fcores+pcores, ports=ports, args=args,
                cpu_shares=cpu_shares, image=image, need_network=need_network)
        except Exception as e:
            # 写给celery日志看
            print e
            host.release_cores(cores_for_one_container, nshare)
            continue

        if spec_ips:
            ips = [n.acquire_specific_ip(ip) for n, ip in zip(networks, spec_ips)]
        else:
            ips = [n.acquire_ip() for n in networks]
        ips = [i for i in ips if i]
        ip_dict = {ip.vlan_address: ip for ip in ips}

        if ips:
            if ERU_AGENT_API == 'pubsub':
                values = [str(task_id), cid] + ['{0}:{1}'.format(ip.vlan_seq_id, ip.vlan_address) for ip in ips]
                rds.publish(pub_agent_vlan_key, '|'.join(values))
            elif ERU_AGENT_API == 'http':
                agent = get_agent(host)
                ip_list = [(ip.vlan_seq_id, ip.vlan_address) for ip in ips]
                agent.add_container_vlan(cid, str(task_id), ip_list)

        for _ in ips:
            # timeout 15s
            rv = rds.blpop(feedback_key, 15)
            if rv is None:
                break
            # rv is like (feedback_key, 'succ|container_id|vethname|ip')
            succ, _, vethname, vlan_address = rv[1].split('|')
            if succ == '0':
                break
            ip = ip_dict.get(vlan_address, None)
            if ip:
                ip.set_vethname(vethname)

            if route:
                rds.publish(pub_agent_route_key, '%s|%s' % (cid, route))

        else:
            current_flask.logger.info('Creating container (cid=%s, ips=%s)', cid, ips)
            c = Container.create(cid, host, version, cname, entrypoint,
                    cores_for_one_container, env, nshare, callback_url)
            for ip in ips:
                ip.assigned_to_container(c)
            notifier.notify_agent(c)
            add_container_for_agent(c)
            add_container_backends(c)
            cids.append(cid)
            # 略过清理工作
            continue

        # 清理掉失败的容器, 释放核, 释放ip
        current_flask.logger.info('Cleaning failed container (cid=%s)', cid)
        dockerjob.remove_container_by_cid([cid], host)
        host.release_cores(cores_for_one_container, nshare)
        [ip.release() for ip in ips]
        # 失败了就得清理掉这个key
        rds.delete(feedback_key)

    publish_to_service_discovery(version.name)
    task.finish_with_result(consts.TASK_SUCCESS, container_ids=cids)
    notifier.pub_success()

    # 有IO, 丢最后面算了
    falcon_all_graphs(version)
    falcon_all_alarms(version)

    current_flask.logger.info('Task<id=%s>: Done', task_id)
Exemple #33
0
def get_container_by_id(id):
    c = Container.get(id)
    if not c:
        raise EruAbortException(consts.HTTP_NOT_FOUND, 'Container %s not found' % id)
    return c
Exemple #34
0
def remove_container(cid):
    c = Container.get_by_container_id(cid)
    if not c:
        return {'r': 1, 'msg': 'container %s not found' % cid}
    dockerjob.remove_container_by_cid([cid], c.host)
    return {'r': 0, 'msg': consts.OK}
Exemple #35
0
def create_containers_with_macvlan(task_id, ncontainer, core_ids, network_ids):
    """
    执行task_id的任务. 部署ncontainer个容器, 占用core_ids这些核, 绑定到networks这些子网
    """
    task = Task.get(task_id)
    if not task:
        return

    networks = Network.get_multi(network_ids)

    notifier = TaskNotifier(task)
    host = task.host
    version = task.version
    entrypoint = task.props['entrypoint']
    env = task.props['env']
    used_cores = Core.get_multi(core_ids)

    pub_agent_vlan_key = 'eru:agent:%s:vlan' % host.name
    feedback_key = 'eru:agent:%s:feedback' % task_id

    cids = []

    for cores in more_itertools.chunked(used_cores, len(core_ids)/ncontainer):
        try:
            cid, cname = dockerjob.create_one_container(host, version,
                    entrypoint, env, cores)
        except:
            host.release_cores(cores)
            continue

        ips = [n.acquire_ip() for n in networks]
        ip_dict = {ip.vlan_address: ip for ip in ips}

        if ips:
            ident_id = cname.split('_')[-1]
            values = [str(task_id), cid, ident_id] + ['{0}:{1}'.format(ip.vlan_seq_id, ip.vlan_address) for ip in ips]
            rds.publish(pub_agent_vlan_key, '|'.join(values))

        for _ in ips:
            # timeout 15s
            rv = rds.blpop(feedback_key, 15)
            if rv is None:
                break
            # rv is like (feedback_key, 'succ|container_id|vethname|ip')
            succ, _, vethname, vlan_address = rv[1].split('|')
            if succ == '0':
                break
            ip = ip_dict.get(vlan_address, None)
            if ip:
                ip.set_vethname(vethname)

        else:
            logger.info('Creating container with cid %s and ips %s' % (cid, ips))
            c = Container.create(cid, host, version, cname, entrypoint, cores, env)
            for ip in ips:
                ip.assigned_to_container(c)
            notifier.notify_agent(cid)
            add_container_for_agent(c)
            add_container_backends(c)
            cids.append(cid)
            # 略过清理工作
            continue

        # 清理掉失败的容器, 释放核, 释放ip
        logger.info('Cleaning failed container with cid %s' % cid)
        dockerjob.remove_container_by_cid([cid], host)
        host.release_cores(cores)
        [ip.release() for ip in ips]

    publish_to_service_discovery(version.name)
    task.finish_with_result(code.TASK_SUCCESS, container_ids=cids)
    notifier.pub_success()
Exemple #36
0
def create_containers_with_macvlan_public(task_id, ncontainer, nshare, network_ids, spec_ips=None):
    """
    执行task_id的任务. 部署ncontainer个容器, 绑定到networks这些子网
    """
    current_flask.logger.info('Task<id=%s>: Started', task_id)
    task = Task.get(task_id)
    if not task:
        current_flask.logger.error('Task (id=%s) not found, quit', task_id)
        return

    if spec_ips is None:
        spec_ips = []

    networks = Network.get_multi(network_ids)

    notifier = TaskNotifier(task)
    host = task.host
    version = task.version
    entrypoint = task.props['entrypoint']
    env = task.props['env']
    # use raw
    image = task.props['image']
    cpu_shares = 1024

    pub_agent_vlan_key = 'eru:agent:%s:vlan' % host.name
    feedback_key = 'eru:agent:%s:feedback' % task_id

    cids = []

    for _ in range(ncontainer):
        try:
            cid, cname = dockerjob.create_one_container(host, version,
                entrypoint, env, cores=None, cpu_shares=cpu_shares, image=image)
        except Exception as e:
            print e # 同上
            continue

        if spec_ips:
            ips = [n.acquire_specific_ip(ip) for n, ip in zip(networks, spec_ips)]
        else:
            ips = [n.acquire_ip() for n in networks]
        ips = [i for i in ips if i]
        ip_dict = {ip.vlan_address: ip for ip in ips}

        if ips:
            ident_id = cname.split('_')[-1]
            values = [str(task_id), cid, ident_id] + ['{0}:{1}'.format(ip.vlan_seq_id, ip.vlan_address) for ip in ips]
            rds.publish(pub_agent_vlan_key, '|'.join(values))

        for _ in ips:
            # timeout 15s
            rv = rds.blpop(feedback_key, 15)
            if rv is None:
                break
            # rv is like (feedback_key, 'succ|container_id|vethname|ip')
            succ, _, vethname, vlan_address = rv[1].split('|')
            if succ == '0':
                break
            ip = ip_dict.get(vlan_address, None)
            if ip:
                ip.set_vethname(vethname)

        else:
            current_flask.logger.info('Creating container (cid=%s, ips=%s)', cid, ips)
            c = Container.create(cid, host, version, cname, entrypoint, {}, env, nshare)
            for ip in ips:
                ip.assigned_to_container(c)
            notifier.notify_agent(cid)
            add_container_for_agent(c)
            add_container_backends(c)
            cids.append(cid)
            # 略过清理工作
            continue

        # 清理掉失败的容器, 释放核, 释放ip
        current_flask.logger.info('Cleaning failed container (cid=%s)', cid)
        dockerjob.remove_container_by_cid([cid], host)
        [ip.release() for ip in ips]
        # 失败了就得清理掉这个key
        rds.delete(feedback_key)

    publish_to_service_discovery(version.name)
    task.finish_with_result(consts.TASK_SUCCESS, container_ids=cids)
    notifier.pub_success()
    current_flask.logger.info('Task<id=%s>: Done', task_id)
Exemple #37
0
def test_container(test_db):
    a = App.get_or_create('app', 'http://git.hunantv.com/group/app.git', '')
    assert a is not None
    assert a.id == a.user_id

    v = a.add_version(random_sha1())
    assert v is not None
    assert v.app.id == a.id
    assert v.name == a.name
    assert len(v.containers.all()) == 0
    assert len(v.tasks.all()) == 0

    g = Group.create('group', 'group')
    p = Pod.create('pod', 'pod', 10, -1)
    assert p.assigned_to_group(g)
    hosts = [Host.create(p, random_ipv4(), random_string(prefix='host'),
        random_uuid(), 4, 4096) for i in range(6)]

    for host in hosts[:3]:
        host.assigned_to_group(g)
    host_ids1 = {h.id for h in hosts[:3]}
    host_ids2 = {h.id for h in hosts[3:]}

    host_cores = g.get_free_cores(p, 3, 3, 0)

    #测试没有碎片核的情况
    #获取核
    containers = []
    for (host, count), cores in host_cores.iteritems():
        cores_per_container = len(cores['full']) / count
        for i in range(count):
            cid = random_sha1()
            used_cores = {'full': cores['full'][i*cores_per_container:(i+1)*cores_per_container]}
            # not using a port
            c = Container.create(cid, host, v, random_string(), 'entrypoint', used_cores, 'env')
            assert c is not None
            containers.append(c)
        host.occupy_cores(cores, 0)

    for host in g.private_hosts.all():
        full_cores, part_cores = host.get_free_cores()
        assert len(full_cores) == 1
        assert len(part_cores) == 0
        assert len(host.containers.all()) == 1
        assert host.count == 1

    assert len(containers) == 3
    assert len(v.containers.all()) == 3

    for c in containers:
        assert c.host_id in host_ids1
        assert c.host_id not in host_ids2
        assert c.app.id == a.id
        assert c.version.id == v.id
        assert c.is_alive
        assert len(c.full_cores.all()) == 3
        assert len(c.part_cores.all()) == 0
        all_core_labels = sorted(['0', '1', '2', '3', ])
        used_full_core_labels = [core.label for core in c.full_cores.all()]
        used_part_core_labels = [core.label for core in c.part_cores.all()]
        free_core_labels = [core.label for core in c.host.get_free_cores()[0]]
        assert all_core_labels == sorted(used_full_core_labels + used_part_core_labels + free_core_labels)

    #释放核
    for c in containers:
        c.delete()

    assert len(v.containers.all()) == 0
    assert g.get_max_containers(p, 3, 0) == 3
    host_cores = g.get_free_cores(p, 3, 3, 0)
    assert len(host_cores) == 3

    for host in g.private_hosts.all():
        full_cores, part_cores = host.get_free_cores()
        assert len(full_cores) == 4
        assert len(part_cores) == 0
        assert len(host.containers.all()) == 0
        assert host.count == 0

    #测试有碎片的情况
    #获取核
    host_cores = g.get_free_cores(p, 3, 3, 4)
    containers = []
    for (host, count), cores in host_cores.iteritems():
        cores_per_container = len(cores['full']) / count
        for i in range(count):
            cid = random_sha1()
            used_cores = {'full':  cores['full'][i*cores_per_container:(i+1)*cores_per_container],
                    'part': cores['part']}
            # not using a port
            c = Container.create(cid, host, v, random_string(), 'entrypoint', used_cores, 'env')
            assert c is not None
            containers.append(c)
        host.occupy_cores(cores, 4)

    for host in g.private_hosts.all():
        full_cores, part_cores = host.get_free_cores()
        assert len(full_cores) == 0
        assert len(part_cores) == 1
        assert part_cores[0].used == 4
        assert len(host.containers.all()) == 1
        assert host.count == 1

    assert len(containers) == 3
    assert len(v.containers.all()) == 3

    for c in containers:
        assert c.host_id in host_ids1
        assert c.host_id not in host_ids2
        assert c.app.id == a.id
        assert c.version.id == v.id
        assert c.is_alive
        assert len(c.full_cores.all()) == 3
        assert len(c.part_cores.all()) == 1
        all_core_labels = sorted(['0', '1', '2', '3', ])
        used_full_core_labels = [core.label for core in c.full_cores.all()]
        used_part_core_labels = [core.label for core in c.part_cores.all()]
        free_core_labels = [core.label for core in c.host.get_free_cores()[0]]
        assert all_core_labels == sorted(used_full_core_labels + used_part_core_labels + free_core_labels)


    #释放核
    for c in containers:
        c.delete(4)

    assert len(v.containers.all()) == 0
    assert g.get_max_containers(p, 3, 0) == 3
    host_cores = g.get_free_cores(p, 3, 3, 0)
    assert len(host_cores) == 3

    for host in g.private_hosts.all():
        full_cores, part_cores = host.get_free_cores()
        assert len(full_cores) == 4
        assert len(host.containers.all()) == 0
        assert host.count == 0

    #获取
    host_cores = g.get_free_cores(p, 6, 1, 5)
    containers = []
    for (host, count), cores in host_cores.iteritems():
        cores_per_container = len(cores['full']) / count
        for i in range(count):
            cid = random_sha1()
            used_cores = {'full':  cores['full'][i*cores_per_container:(i+1)*cores_per_container],
                    'part': cores['part'][i:i+1]}
            # not using a port
            c = Container.create(cid, host, v, random_string(), 'entrypoint', used_cores, 'env')
            assert c is not None
            containers.append(c)
            host.occupy_cores(used_cores, 5)

    for host in g.private_hosts.all():
        full_cores, part_cores = host.get_free_cores()
        assert len(full_cores) == 1
        assert len(part_cores) == 0
        assert len(host.containers.all()) == 2
        assert host.count == 2

    assert len(containers) == 6
    assert len(v.containers.all()) == 6

    for c in containers:
        assert c.host_id in host_ids1
        assert c.host_id not in host_ids2
        assert c.app.id == a.id
        assert c.version.id == v.id
        assert c.is_alive
        assert len(c.full_cores.all()) == 1
        assert len(c.part_cores.all()) == 1

    ##释放核
    for c in containers:
        c.delete(5)

    assert len(v.containers.all()) == 0
    assert g.get_max_containers(p, 3, 0) == 3
    host_cores = g.get_free_cores(p, 3, 3, 0)
    assert len(host_cores) == 3

    for host in g.private_hosts.all():
        full_cores, part_cores = host.get_free_cores()
        assert len(full_cores) == 4
        assert len(part_cores) == 0
        assert len(host.containers.all()) == 0
        assert host.count == 0
Exemple #38
0
def migrate_container_set(host):
    containers = Container.get_multi_by_host(host)
    for c in containers:
        add_container_for_agent(c)
Exemple #39
0
def test_container(test_db):
    a = App.get_or_create('app', 'http://git.hunantv.com/group/app.git')
    assert a is not None
    assert a.id == a.user_id

    v = a.add_version(random_sha1())
    assert v is not None
    assert v.app.id == a.id
    assert v.name == a.name
    assert len(v.containers.all()) == 0
    assert len(v.tasks.all()) == 0

    p = Pod.create('pod', 'pod', 10, -1)
    hosts = [
        Host.create(p, random_ipv4(), random_string(prefix='host'),
                    random_uuid(), 4, 4096) for i in range(6)
    ]

    for host in hosts[3:]:
        host.set_public()
    host_ids1 = {h.id for h in hosts[:3]}
    host_ids2 = {h.id for h in hosts[3:]}

    host_cores = centralized_schedule(p, 3, 3, 0)

    #测试没有碎片核的情况
    #获取核
    containers = []
    for (host, count), cores in host_cores.iteritems():
        host.occupy_cores(cores, 0)
        cores_per_container = len(cores['full']) / count
        for i in range(count):
            cid = random_sha1()
            used_cores = {
                'full':
                cores['full'][i * cores_per_container:(i + 1) *
                              cores_per_container]
            }
            c = Container.create(cid,
                                 host,
                                 v,
                                 random_string(),
                                 'entrypoint',
                                 used_cores,
                                 'env',
                                 nshare=0)
            assert c is not None
            containers.append(c)

    for host in p.get_private_hosts():
        full_cores, part_cores = host.get_free_cores()
        assert len(full_cores) == 1
        assert len(part_cores) == 0
        assert len(host.containers.all()) == 1
        assert host.count == 1

    assert len(containers) == 3
    assert len(v.containers.all()) == 3

    for c in containers:
        assert c.host_id in host_ids1
        assert c.host_id not in host_ids2
        assert c.app.id == a.id
        assert c.version.id == v.id
        assert c.is_alive
        assert len(c.full_cores) == 3
        assert len(c.part_cores) == 0
        all_core_labels = sorted([
            '0',
            '1',
            '2',
            '3',
        ])
        used_full_core_labels = [core.label for core in c.full_cores]
        used_part_core_labels = [core.label for core in c.part_cores]
        free_core_labels = [core.label for core in c.host.get_free_cores()[0]]
        assert all_core_labels == sorted(used_full_core_labels +
                                         used_part_core_labels +
                                         free_core_labels)

    #释放核
    for c in containers:
        c.delete()

    assert len(v.containers.all()) == 0
    assert get_max_container_count(p, 3, 0) == 3
    host_cores = centralized_schedule(p, 3, 3, 0)
    assert len(host_cores) == 3

    for host in p.get_private_hosts():
        full_cores, part_cores = host.get_free_cores()
        assert len(full_cores) == 4
        assert len(part_cores) == 0
        assert len(host.containers.all()) == 0
        assert host.count == 4

    #测试有碎片的情况
    #获取核
    host_cores = centralized_schedule(p, 3, 3, 4)
    containers = []
    for (host, count), cores in host_cores.iteritems():
        cores_per_container = len(cores['full']) / count
        host.occupy_cores(cores, 4)
        for i in range(count):
            cid = random_sha1()
            used_cores = {
                'full':
                cores['full'][i * cores_per_container:(i + 1) *
                              cores_per_container],
                'part':
                cores['part']
            }
            # not using a port
            c = Container.create(cid,
                                 host,
                                 v,
                                 random_string(),
                                 'entrypoint',
                                 used_cores,
                                 'env',
                                 nshare=4)
            assert c is not None
            containers.append(c)

    for host in p.get_private_hosts():
        full_cores, part_cores = host.get_free_cores()
        assert len(full_cores) == 0
        assert len(part_cores) == 1
        assert part_cores[0].remain == 6
        assert len(host.containers.all()) == 1
        assert host.count == D('0.6')

    assert len(containers) == 3
    assert len(v.containers.all()) == 3

    for c in containers:
        assert c.host_id in host_ids1
        assert c.host_id not in host_ids2
        assert c.app.id == a.id
        assert c.version.id == v.id
        assert c.is_alive
        assert len(c.full_cores) == 3
        assert len(c.part_cores) == 1
        all_core_labels = sorted([
            '0',
            '1',
            '2',
            '3',
        ])
        used_full_core_labels = [core.label for core in c.full_cores]
        used_part_core_labels = [core.label for core in c.part_cores]
        free_core_labels = [core.label for core in c.host.get_free_cores()[0]]
        assert all_core_labels == sorted(used_full_core_labels +
                                         used_part_core_labels +
                                         free_core_labels)

    #释放核
    for c in containers:
        c.delete()

    assert len(v.containers.all()) == 0
    assert get_max_container_count(p, 3, 0) == 3
    host_cores = centralized_schedule(p, 3, 3, 0)
    assert len(host_cores) == 3

    for host in p.get_private_hosts():
        full_cores, part_cores = host.get_free_cores()
        assert len(full_cores) == 4
        assert len(host.containers.all()) == 0
        assert host.count == 4

    #获取
    host_cores = centralized_schedule(p, 6, 1, 5)
    containers = []
    for (host, count), cores in host_cores.iteritems():
        cores_per_container = len(cores['full']) / count
        for i in range(count):
            cid = random_sha1()
            used_cores = {
                'full':
                cores['full'][i * cores_per_container:(i + 1) *
                              cores_per_container],
                'part':
                cores['part'][i:i + 1],
            }
            host.occupy_cores(used_cores, 5)
            # not using a port
            c = Container.create(cid,
                                 host,
                                 v,
                                 random_string(),
                                 'entrypoint',
                                 used_cores,
                                 'env',
                                 nshare=5)
            assert c is not None
            containers.append(c)

    for host in p.get_private_hosts():
        full_cores, part_cores = host.get_free_cores()
        assert len(full_cores) == 1
        assert len(part_cores) == 0
        assert len(host.containers.all()) == 2
        assert host.count == D('1')

    assert len(containers) == 6
    assert len(v.containers.all()) == 6

    for c in containers:
        assert c.host_id in host_ids1
        assert c.host_id not in host_ids2
        assert c.app.id == a.id
        assert c.version.id == v.id
        assert c.is_alive
        assert len(c.full_cores) == 1
        assert len(c.part_cores) == 1

    ##释放核
    for c in containers:
        c.delete()

    assert len(v.containers.all()) == 0
    assert get_max_container_count(p, 3, 0) == 3
    host_cores = centralized_schedule(p, 3, 3, 0)
    assert len(host_cores) == 3

    for host in p.get_private_hosts():
        full_cores, part_cores = host.get_free_cores()
        assert len(full_cores) == 4
        assert len(part_cores) == 0
        assert len(host.containers.all()) == 0
        assert host.count == 4
Exemple #40
0
def stop_container(cid):
    c = Container.get_by_container_id(cid)
    if c:
        c.kill()
        dockerjob.stop_containers([c,], c.host)
    return {'r':0, 'msg': code.OK}
Exemple #41
0
def create_containers(task_id,
                      ncontainer,
                      nshare,
                      cores,
                      network_ids,
                      spec_ips=None):
    """
    执行task_id的任务. 部署ncontainer个容器, 占用*_core_ids这些核, 绑定到networks这些子网
    """
    _log.info('Task<id=%s>: Started', task_id)
    task = Task.get(task_id)
    if not task:
        _log.error('Task (id=%s) not found, quit', task_id)
        return

    if spec_ips is None:
        spec_ips = []

    need_network = bool(network_ids)
    networks = [ipam.get_pool(n) for n in network_ids]

    notifier = TaskNotifier(task)
    host = task.host
    version = task.version
    entrypoint = task.props['entrypoint']
    env = task.props['env']
    ports = task.props['ports']
    args = task.props['args']
    # use raw
    image = task.props['image']
    callback_url = task.props['callback_url']
    cpu_shares = int(float(nshare) / host.pod.core_share *
                     1024) if nshare else 1024

    cids = []
    backends = []
    entry = version.appconfig.entrypoints[entrypoint]

    for fcores, pcores in _iter_cores(cores, ncontainer):
        cores_for_one_container = {'full': fcores, 'part': pcores}
        # 在宿主机上创建容器
        try:
            cid, cname = dockerjob.create_one_container(
                host,
                version,
                entrypoint,
                env,
                fcores + pcores,
                ports=ports,
                args=args,
                cpu_shares=cpu_shares,
                image=image,
                need_network=need_network)
        except Exception as e:
            # 写给celery日志看
            _log.exception(e)
            host.release_cores(cores_for_one_container, nshare)
            continue

        # 容器记录下来
        c = Container.create(cid, host, version, cname, entrypoint,
                             cores_for_one_container, env, nshare,
                             callback_url)

        # 为容器创建网络栈
        # 同时把各种信息都记录下来
        # 如果失败, 清除掉所有记录和宿主机上的容器
        # 循环下一次尝试
        cidrs = [n.netspace for n in networks]
        if not ipam.allocate_ips(cidrs, cid, spec_ips):
            _clean_failed_containers(cid)
            continue

        notifier.notify_agent(c)
        add_container_for_agent(host, c)
        add_container_backends(c)
        cids.append(cid)
        backends.extend(c.get_backends())

        c.callback_report(status='start')

    health_check = entry.get('health_check', '')
    if health_check and backends:
        urls = [b + health_check for b in backends]
        if not wait_health_check(urls):
            # TODO 这里要么回滚要么报警
            _log.info('Task<id=%s>: Done, but something went error', task_id)
            return

    publish_to_service_discovery(version.name)
    task.finish(consts.TASK_SUCCESS)
    task.reason = 'ok'
    task.container_ids = cids
    notifier.pub_success()

    _log.info('Task<id=%s>: Done', task_id)