Example #1
0
def test_app_image():
    if not core_online:
        pytest.skip(
            msg='one or more eru-core is offline, skip core-related tests')

    specs = make_specs()
    appname = default_appname
    builds_map = {
        stage_name: pb.Build(**build)
        for stage_name, build in specs.builds.items()
    }
    core_builds = pb.Builds(stages=specs.stages, builds=builds_map)
    opts = pb.BuildImageOptions(name=appname,
                                user=appname,
                                uid=12345,
                                tag=default_sha,
                                builds=core_builds)
    core = get_core(BUILD_ZONE)
    build_image_messages = list(core.build_image(opts))
    image_tag = ''
    for m in build_image_messages:
        assert not m.error

    image_tag = m.progress
    assert '{}:{}'.format(default_appname, default_sha) in image_tag
    return image_tag
Example #2
0
def get_pod_nodes(name):
    """List nodes under a pod

    **Example response**:

    .. sourcecode:: http

        HTTP/1.1 200 OK
        Content-Type: application/json

        [
            {
                "name": "c1-eru-2.ricebook.link",
                "endpoint": "tcp://xxx.xxx.xxx.xxx:2376",
                "podname": "eru",
                "cpu": {"0": 75},
                "memory": 855085056,
                "info": "{\\"ID\\":\\"UUWL:QZS7:MPQY:KMYY:T5Q4:GCBY:JBRA:Q55K:NUKW:O2N2:4BEX:UTFK\\",\\"Containers\\":7,\\"ContainersRunning\\":6,\\"ContainersPaused\\":0,\\"ContainersStopped\\":1,\\"Images\\":9,\\"Driver\\":\\"overlay\\",\\"DriverStatus\\":[[\\"Backing Filesystem\\",\\"xfs\\"],[\\"Supports d_type\\",\\"false\\"]],\\"SystemStatus\\":null,\\"Plugins\\":{\\"Volume\\":[\\"local\\"],\\"Network\\":[\\"bridge\\",\\"host\\",\\"macvlan\\",\\"null\\",\\"overlay\\"],\\"Authorization\\":null},\\"MemoryLimit\\":true,\\"SwapLimit\\":true,\\"KernelMemory\\":true,\\"CpuCfsPeriod\\":true,\\"CpuCfsQuota\\":true,\\"CPUShares\\":true,\\"CPUSet\\":true,\\"IPv4Forwarding\\":true,\\"BridgeNfIptables\\":true,\\"BridgeNfIp6tables\\":true,\\"Debug\\":false,\\"NFd\\":57,\\"OomKillDisable\\":true,\\"NGoroutines\\":72,\\"SystemTime\\":\\"2018-03-20T16:10:51.806831123+08:00\\",\\"LoggingDriver\\":\\"json-file\\",\\"CgroupDriver\\":\\"cgroupfs\\",\\"NEventsListener\\":1,\\"KernelVersion\\":\\"3.10.0-693.5.2.el7.x86_64\\",\\"OperatingSystem\\":\\"CentOS Linux 7 (Core)\\",\\"OSType\\":\\"linux\\",\\"Architecture\\":\\"x86_64\\",\\"IndexServerAddress\\":\\"https://index.docker.io/v1/\\",\\"RegistryConfig\\":{\\"InsecureRegistryCIDRs\\":[\\"127.0.0.0/8\\"],\\"IndexConfigs\\":{\\"docker.io\\":{\\"Name\\":\\"docker.io\\",\\"Mirrors\\":[\\"https://registry.docker-cn.com/\\"],\\"Secure\\":true,\\"Official\\":true}},\\"Mirrors\\":[\\"https://registry.docker-cn.com/\\"]},\\"NCPU\\":1,\\"MemTotal\\":1928826880,\\"DockerRootDir\\":\\"/var/lib/docker\\",\\"HttpProxy\\":\\"\\",\\"HttpsProxy\\":\\"\\",\\"NoProxy\\":\\"\\",\\"Name\\":\\"c1-eru-2.ricebook.link\\",\\"Labels\\":[],\\"ExperimentalBuild\\":false,\\"ServerVersion\\":\\"17.12.1-ce\\",\\"ClusterStore\\":\\"etcd://127.0.0.1:2379\\",\\"ClusterAdvertise\\":\\"\\",\\"Runtimes\\":{\\"runc\\":{\\"path\\":\\"docker-runc\\"}},\\"DefaultRuntime\\":\\"runc\\",\\"Swarm\\":{\\"NodeID\\":\\"\\",\\"NodeAddr\\":\\"\\",\\"LocalNodeState\\":\\"inactive\\",\\"ControlAvailable\\":false,\\"Error\\":\\"\\",\\"RemoteManagers\\":null},\\"LiveRestoreEnabled\\":false,\\"Isolation\\":\\"\\",\\"InitBinary\\":\\"docker-init\\",\\"ContainerdCommit\\":{\\"ID\\":\\"9b55aab90508bd389d7654c4baf173a981477d55\\",\\"Expected\\":\\"9b55aab90508bd389d7654c4baf173a981477d55\\"},\\"RuncCommit\\":{\\"ID\\":\\"9f9c96235cc97674e935002fc3d78361b696a69e\\",\\"Expected\\":\\"9f9c96235cc97674e935002fc3d78361b696a69e\\"},\\"InitCommit\\":{\\"ID\\":\\"949e6fa\\",\\"Expected\\":\\"949e6fa\\"},\\"SecurityOptions\\":[\\"name=seccomp,profile=default\\"]}",
            "available": true,
            "labels": {},
            "__class__": "Node"
            }
        ]
    """
    pod = _get_pod(name)
    return get_core(g.zone).get_pod_nodes(pod.name)
Example #3
0
def create_container(self, zone=None, user_id=None, appname=None, sha=None,
                     combo_name=None, debug=False, task_id=None):
    release = Release.get_by_app_and_sha(appname, sha)
    app = release.app
    combo = app.get_combo(combo_name)
    deploy_opt = release.make_core_deploy_options(combo_name)
    ms = get_core(zone).create_container(deploy_opt)

    bad_news = []
    deploy_messages = []
    for m in ms:
        self.stream_output(m, task_id=task_id)
        deploy_messages.append(m)

        if m.success:
            logger.debug('Creating container %s:%s got grpc message %s', appname, combo.entrypoint_name, m)
            override_status = ContainerOverrideStatus.DEBUG if debug else ContainerOverrideStatus.NONE
            Container.create(appname,
                             sha,
                             m.id,
                             m.name,
                             combo_name,
                             combo.entrypoint_name,
                             combo.envname,
                             combo.cpu_quota,
                             combo.memory,
                             zone,
                             m.podname,
                             m.nodename,
                             override_status=override_status)

            op_content = {'entrypoint': combo.entrypoint_name,
                          'envname': combo.envname,
                          'networks': combo.networks,
                          'container_id': m.id}
            OPLog.create(container_id=m.id,
                         user_id=user_id,
                         action=OPType.CREATE_CONTAINER,
                         appname=appname,
                         sha=sha,
                         zone=zone,
                         content=op_content)
        else:
            bad_news.append(m)

    if bad_news:
        msg = 'Deploy {}\n*BAD NEWS*:\n```\n{}\n```\n'.format(
            appname,
            json.dumps(bad_news, cls=VersatileEncoder),
        )
        notbot_sendmsg(app.subscribers, msg)

    # if called synchronously, the caller will not be able to receive the task
    # output by redis pubsub, so the task function will return the
    # deploy_messages
    # if called asynchronously, grpc message cannot be pickled easily, that's
    # why this task only return when called synchronously
    if not self.request.id:
        return deploy_messages
Example #4
0
def backup(container_id, src_path):
    container = Container.get_by_container_id(container_id)
    try:
        result = get_core(container.zone).backup(container.container_id, src_path)
    except RpcError as e:
        notbot_sendmsg(container.app.subscribers, 'Backup container {} failed, err: {}'.format(container_id, e))
        return

    error = result.error
    if error:
        notbot_sendmsg(container.app.subscribers, 'Backup container {} failed, err: {}'.format(container_id, error))
Example #5
0
def remove_container(self, container_ids, user_id=None, task_id=None):
    if isinstance(container_ids, str):
        container_ids = [container_ids]

    containers = [Container.get_by_container_id(i) for i in container_ids]
    containers = [c for c in containers if c]
    if not containers:
        return
    full_ids = [c.container_id for c in containers]
    zones = set(c.zone for c in containers)
    if len(zones) != 1:
        raise ActionError('Cannot remove containers across zone')
    zone = zones.pop()

    for c in containers:
        c.mark_removing()

    update_elb_for_containers(containers, UpdateELBAction.REMOVE)

    ms = get_core(zone).remove_container(full_ids)
    remove_container_messages = []
    for m in ms:
        self.stream_output(m, task_id=task_id)
        remove_container_messages.append(m)

        container = Container.get_by_container_id(m.id)
        if not container:
            logger.info('Container [%s] not found when deleting', m.id)
            continue

        if m.success:
            container.delete()
            # 记录oplog
            op_content = {'container_id': m.id}
            OPLog.create(container_id=m.id,
                         user_id=user_id,
                         action=OPType.REMOVE_CONTAINER,
                         appname=container.appname,
                         sha=container.sha,
                         zone=container.zone,
                         content=op_content)
        elif 'Key not found' in m.message or 'No such container' in m.message:
            container.delete()
        else:
            logger.error('Remove container %s got error: %s', m.id, m.message)
            notbot_sendmsg('#platform', 'Error removing container {}: {}\n@timfeirg'.format(m.id, m.message))

    # reason see the end of create_container definition
    if not self.request.id:
        return remove_container_messages
Example #6
0
def build_image(self, appname, sha):
    release = Release.get_by_app_and_sha(appname, sha)
    specs = release.specs
    if release.raw:
        release.update_image(specs.base)
        return

    core = get_core(BUILD_ZONE)
    opts = release.make_core_build_options()
    build_messages = core.build_image(opts)
    for m in build_messages:
        self.stream_output(m)

    image_tag = m.progress
    release.update_image(image_tag)
    return image_tag
Example #7
0
def list_networks(name):
    """List networks under a pod

    **Example response**:

    .. sourcecode:: http

        HTTP/1.1 200 OK
        Content-Type: application/json

        [
            {"name": "host", "subnets": [], "__class__": "Network"},
            {"name": "bridge", "subnets": ["172.17.0.0/16"], "__class__": "Network"}
        ]
    """
    pod = _get_pod(name)
    return get_core(g.zone).list_networks(pod.name)
Example #8
0
def record_health_status(self):
    """health check for citadel itself:
        if citadel web is down, sa will know
        if citadel worker is down, the health stats in redis will expire in 20 secs, and then sa will know
        if eru-core is down, send slack message
    """
    for zone in ZONE_CONFIG:
        core = get_core(zone)
        try:
            core.list_pods()
        except RpcError as e:
            if e.code() is StatusCode.UNAVAILABLE:
                msg = 'eru-core ({}) is down, @eru will fix this ASAP'.format(zone)
                rds.setex(CITADEL_HEALTH_CHECK_STATS_KEY, msg, 30)
                notbot_sendmsg('#platform', msg)

    rds.setex(CITADEL_HEALTH_CHECK_STATS_KEY, 'OK', 30)
Example #9
0
def get_all_pods():
    """List all pods

    **Example response**:

    .. sourcecode:: http

        HTTP/1.1 200 OK
        Content-Type: application/json

        [
            {
                "name": "eru",
                "desc": "eru test pod",
                "__class__": "Pod"
            }
        ]
    """
    return get_core(g.zone).list_pods()
Example #10
0
def test_workflow(request, test_app_image):
    '''
    test core grpc here, no flask and celery stuff involved
    build, create, remove, and check if everything works
    '''
    core = get_core(BUILD_ZONE)
    # now create container
    entrypoint_opt = pb.EntrypointOptions(
        name='web',
        command='python -m http.server',
        dir='/home/{}'.format(default_appname))
    networks = {default_network_name: ''}
    deploy_opt = pb.DeployOptions(name=default_appname,
                                  entrypoint=entrypoint_opt,
                                  podname=default_podname,
                                  image=test_app_image,
                                  cpu_quota=default_cpu_quota,
                                  memory=default_memory,
                                  count=1,
                                  networks=networks)
    deploy_messages = list(core.create_container(deploy_opt))

    container_info = deploy_messages[0]
    container_id = container_info.id

    def cleanup():
        remove_container_messages = list(core.remove_container([container_id]))
        remove_container_message = remove_container_messages[0]
        assert remove_container_message.success

    request.addfinalizer(cleanup)

    assert len(deploy_messages) == 1
    deploy_message = deploy_messages[0]
    assert not deploy_message.error
    assert deploy_message.memory == default_memory
    assert deploy_message.podname == default_podname
    network = deploy_message.publish
    assert len(network) == 1
    network_name, ip = network.popitem()
    assert network_name == default_network_name
Example #11
0
def _get_pod(name):
    pod = get_core(g.zone).get_pod(name)
    if not pod:
        abort(404, 'pod `%s` not found' % name)

    return pod
Example #12
0
 def get_node(self):
     return get_core(self.zone).get_node(self.podname, self.nodename)
Example #13
0
def test_create_container(watch_etcd, request, test_app_image):
    release = Release.get_by_app_and_sha(default_appname, default_sha)
    release.update_image(test_app_image)
    combo = release.app.get_combo(default_combo_name)
    combo.update(extra_args=default_extra_args)

    create_container_message = create_container(
        DEFAULT_ZONE,
        FAKE_USER['id'],
        default_appname,
        default_sha,
        default_combo_name,
    )[0]
    assert not create_container_message.error

    # test if hook is executed
    assert hook_proof in create_container_message.hook.decode('utf-8')

    container_id = create_container_message.id

    def cleanup():
        remove_message = remove_container(container_id)[0]
        assert remove_message.success
        # test if hook is executed
        assert hook_proof in remove_message.message

    request.addfinalizer(cleanup)

    container = Container.get_by_container_id(container_id)
    # agent 肯定还没探测到, 所以 deploy_info 应该是默认值
    assert container.deploy_info == {}
    assert container.combo_name == default_combo_name
    assert container.podname == default_podname
    assert container.memory == default_memory
    assert float(container.cpu_quota) == default_cpu_quota

    # check etcd data at /eru-core/deploy/test-app/web
    assert container.wait_for_erection()
    etcd = get_etcd(DEFAULT_ZONE)
    deploy_info = json.loads(etcd.read(container.core_deploy_key).value)

    # check watch_etcd process is actually working
    assert container.deploy_info == deploy_info

    assert deploy_info['Healthy'] is True
    assert deploy_info['Extend']['healthcheck_tcp'] == ''
    assert deploy_info['Extend']['healthcheck_http'] == str(default_publish[0])
    assert deploy_info['Extend']['healthcheck_url'] == '/{}'.format(artifact_filename)
    assert int(deploy_info['Extend']['healthcheck_code']) == 200
    publish = deploy_info['Publish']
    assert len(publish) == 1
    network_name, address = publish.popitem()
    ip = address.split(':', 1)[0]

    # test if web entrypoint is up
    artifact_url = 'http://{}:{}/{}'.format(ip, default_publish[0], artifact_filename)
    artifact_response = requests.get(artifact_url)
    assert artifact_content in artifact_response.text

    core = get_core(DEFAULT_ZONE)
    container_info = json.loads(core.get_container(container_id).info)
    assert '/tmp:/home/test-app/tmp:rw' in container_info['HostConfig']['Binds']

    # check environment variables from combo is actually injected into the
    # container
    left_env_vars = set(default_env.to_env_vars())
    right_env_vars = set(container_info['Config']['Env'])
    assert left_env_vars.intersection(right_env_vars) == left_env_vars

    # verify extra_args has been correctly appended
    left_command = '{} {}'.format(default_entrypoints['web']['cmd'], default_extra_args)
    right_command = ' '.join(container_info['Config']['Cmd'])
    assert left_command == right_command