def test_app_image(): if not core_online: pytest.skip( msg='one or more eru-core is offline, skip core-related tests') specs = make_specs() appname = default_appname builds_map = { stage_name: pb.Build(**build) for stage_name, build in specs.builds.items() } core_builds = pb.Builds(stages=specs.stages, builds=builds_map) opts = pb.BuildImageOptions(name=appname, user=appname, uid=12345, tag=default_sha, builds=core_builds) core = get_core(BUILD_ZONE) build_image_messages = list(core.build_image(opts)) image_tag = '' for m in build_image_messages: assert not m.error image_tag = m.progress assert '{}:{}'.format(default_appname, default_sha) in image_tag return image_tag
def get_pod_nodes(name): """List nodes under a pod **Example response**: .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json [ { "name": "c1-eru-2.ricebook.link", "endpoint": "tcp://xxx.xxx.xxx.xxx:2376", "podname": "eru", "cpu": {"0": 75}, "memory": 855085056, "info": "{\\"ID\\":\\"UUWL:QZS7:MPQY:KMYY:T5Q4:GCBY:JBRA:Q55K:NUKW:O2N2:4BEX:UTFK\\",\\"Containers\\":7,\\"ContainersRunning\\":6,\\"ContainersPaused\\":0,\\"ContainersStopped\\":1,\\"Images\\":9,\\"Driver\\":\\"overlay\\",\\"DriverStatus\\":[[\\"Backing Filesystem\\",\\"xfs\\"],[\\"Supports d_type\\",\\"false\\"]],\\"SystemStatus\\":null,\\"Plugins\\":{\\"Volume\\":[\\"local\\"],\\"Network\\":[\\"bridge\\",\\"host\\",\\"macvlan\\",\\"null\\",\\"overlay\\"],\\"Authorization\\":null},\\"MemoryLimit\\":true,\\"SwapLimit\\":true,\\"KernelMemory\\":true,\\"CpuCfsPeriod\\":true,\\"CpuCfsQuota\\":true,\\"CPUShares\\":true,\\"CPUSet\\":true,\\"IPv4Forwarding\\":true,\\"BridgeNfIptables\\":true,\\"BridgeNfIp6tables\\":true,\\"Debug\\":false,\\"NFd\\":57,\\"OomKillDisable\\":true,\\"NGoroutines\\":72,\\"SystemTime\\":\\"2018-03-20T16:10:51.806831123+08:00\\",\\"LoggingDriver\\":\\"json-file\\",\\"CgroupDriver\\":\\"cgroupfs\\",\\"NEventsListener\\":1,\\"KernelVersion\\":\\"3.10.0-693.5.2.el7.x86_64\\",\\"OperatingSystem\\":\\"CentOS Linux 7 (Core)\\",\\"OSType\\":\\"linux\\",\\"Architecture\\":\\"x86_64\\",\\"IndexServerAddress\\":\\"https://index.docker.io/v1/\\",\\"RegistryConfig\\":{\\"InsecureRegistryCIDRs\\":[\\"127.0.0.0/8\\"],\\"IndexConfigs\\":{\\"docker.io\\":{\\"Name\\":\\"docker.io\\",\\"Mirrors\\":[\\"https://registry.docker-cn.com/\\"],\\"Secure\\":true,\\"Official\\":true}},\\"Mirrors\\":[\\"https://registry.docker-cn.com/\\"]},\\"NCPU\\":1,\\"MemTotal\\":1928826880,\\"DockerRootDir\\":\\"/var/lib/docker\\",\\"HttpProxy\\":\\"\\",\\"HttpsProxy\\":\\"\\",\\"NoProxy\\":\\"\\",\\"Name\\":\\"c1-eru-2.ricebook.link\\",\\"Labels\\":[],\\"ExperimentalBuild\\":false,\\"ServerVersion\\":\\"17.12.1-ce\\",\\"ClusterStore\\":\\"etcd://127.0.0.1:2379\\",\\"ClusterAdvertise\\":\\"\\",\\"Runtimes\\":{\\"runc\\":{\\"path\\":\\"docker-runc\\"}},\\"DefaultRuntime\\":\\"runc\\",\\"Swarm\\":{\\"NodeID\\":\\"\\",\\"NodeAddr\\":\\"\\",\\"LocalNodeState\\":\\"inactive\\",\\"ControlAvailable\\":false,\\"Error\\":\\"\\",\\"RemoteManagers\\":null},\\"LiveRestoreEnabled\\":false,\\"Isolation\\":\\"\\",\\"InitBinary\\":\\"docker-init\\",\\"ContainerdCommit\\":{\\"ID\\":\\"9b55aab90508bd389d7654c4baf173a981477d55\\",\\"Expected\\":\\"9b55aab90508bd389d7654c4baf173a981477d55\\"},\\"RuncCommit\\":{\\"ID\\":\\"9f9c96235cc97674e935002fc3d78361b696a69e\\",\\"Expected\\":\\"9f9c96235cc97674e935002fc3d78361b696a69e\\"},\\"InitCommit\\":{\\"ID\\":\\"949e6fa\\",\\"Expected\\":\\"949e6fa\\"},\\"SecurityOptions\\":[\\"name=seccomp,profile=default\\"]}", "available": true, "labels": {}, "__class__": "Node" } ] """ pod = _get_pod(name) return get_core(g.zone).get_pod_nodes(pod.name)
def create_container(self, zone=None, user_id=None, appname=None, sha=None, combo_name=None, debug=False, task_id=None): release = Release.get_by_app_and_sha(appname, sha) app = release.app combo = app.get_combo(combo_name) deploy_opt = release.make_core_deploy_options(combo_name) ms = get_core(zone).create_container(deploy_opt) bad_news = [] deploy_messages = [] for m in ms: self.stream_output(m, task_id=task_id) deploy_messages.append(m) if m.success: logger.debug('Creating container %s:%s got grpc message %s', appname, combo.entrypoint_name, m) override_status = ContainerOverrideStatus.DEBUG if debug else ContainerOverrideStatus.NONE Container.create(appname, sha, m.id, m.name, combo_name, combo.entrypoint_name, combo.envname, combo.cpu_quota, combo.memory, zone, m.podname, m.nodename, override_status=override_status) op_content = {'entrypoint': combo.entrypoint_name, 'envname': combo.envname, 'networks': combo.networks, 'container_id': m.id} OPLog.create(container_id=m.id, user_id=user_id, action=OPType.CREATE_CONTAINER, appname=appname, sha=sha, zone=zone, content=op_content) else: bad_news.append(m) if bad_news: msg = 'Deploy {}\n*BAD NEWS*:\n```\n{}\n```\n'.format( appname, json.dumps(bad_news, cls=VersatileEncoder), ) notbot_sendmsg(app.subscribers, msg) # if called synchronously, the caller will not be able to receive the task # output by redis pubsub, so the task function will return the # deploy_messages # if called asynchronously, grpc message cannot be pickled easily, that's # why this task only return when called synchronously if not self.request.id: return deploy_messages
def backup(container_id, src_path): container = Container.get_by_container_id(container_id) try: result = get_core(container.zone).backup(container.container_id, src_path) except RpcError as e: notbot_sendmsg(container.app.subscribers, 'Backup container {} failed, err: {}'.format(container_id, e)) return error = result.error if error: notbot_sendmsg(container.app.subscribers, 'Backup container {} failed, err: {}'.format(container_id, error))
def remove_container(self, container_ids, user_id=None, task_id=None): if isinstance(container_ids, str): container_ids = [container_ids] containers = [Container.get_by_container_id(i) for i in container_ids] containers = [c for c in containers if c] if not containers: return full_ids = [c.container_id for c in containers] zones = set(c.zone for c in containers) if len(zones) != 1: raise ActionError('Cannot remove containers across zone') zone = zones.pop() for c in containers: c.mark_removing() update_elb_for_containers(containers, UpdateELBAction.REMOVE) ms = get_core(zone).remove_container(full_ids) remove_container_messages = [] for m in ms: self.stream_output(m, task_id=task_id) remove_container_messages.append(m) container = Container.get_by_container_id(m.id) if not container: logger.info('Container [%s] not found when deleting', m.id) continue if m.success: container.delete() # 记录oplog op_content = {'container_id': m.id} OPLog.create(container_id=m.id, user_id=user_id, action=OPType.REMOVE_CONTAINER, appname=container.appname, sha=container.sha, zone=container.zone, content=op_content) elif 'Key not found' in m.message or 'No such container' in m.message: container.delete() else: logger.error('Remove container %s got error: %s', m.id, m.message) notbot_sendmsg('#platform', 'Error removing container {}: {}\n@timfeirg'.format(m.id, m.message)) # reason see the end of create_container definition if not self.request.id: return remove_container_messages
def build_image(self, appname, sha): release = Release.get_by_app_and_sha(appname, sha) specs = release.specs if release.raw: release.update_image(specs.base) return core = get_core(BUILD_ZONE) opts = release.make_core_build_options() build_messages = core.build_image(opts) for m in build_messages: self.stream_output(m) image_tag = m.progress release.update_image(image_tag) return image_tag
def list_networks(name): """List networks under a pod **Example response**: .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json [ {"name": "host", "subnets": [], "__class__": "Network"}, {"name": "bridge", "subnets": ["172.17.0.0/16"], "__class__": "Network"} ] """ pod = _get_pod(name) return get_core(g.zone).list_networks(pod.name)
def record_health_status(self): """health check for citadel itself: if citadel web is down, sa will know if citadel worker is down, the health stats in redis will expire in 20 secs, and then sa will know if eru-core is down, send slack message """ for zone in ZONE_CONFIG: core = get_core(zone) try: core.list_pods() except RpcError as e: if e.code() is StatusCode.UNAVAILABLE: msg = 'eru-core ({}) is down, @eru will fix this ASAP'.format(zone) rds.setex(CITADEL_HEALTH_CHECK_STATS_KEY, msg, 30) notbot_sendmsg('#platform', msg) rds.setex(CITADEL_HEALTH_CHECK_STATS_KEY, 'OK', 30)
def get_all_pods(): """List all pods **Example response**: .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json [ { "name": "eru", "desc": "eru test pod", "__class__": "Pod" } ] """ return get_core(g.zone).list_pods()
def test_workflow(request, test_app_image): ''' test core grpc here, no flask and celery stuff involved build, create, remove, and check if everything works ''' core = get_core(BUILD_ZONE) # now create container entrypoint_opt = pb.EntrypointOptions( name='web', command='python -m http.server', dir='/home/{}'.format(default_appname)) networks = {default_network_name: ''} deploy_opt = pb.DeployOptions(name=default_appname, entrypoint=entrypoint_opt, podname=default_podname, image=test_app_image, cpu_quota=default_cpu_quota, memory=default_memory, count=1, networks=networks) deploy_messages = list(core.create_container(deploy_opt)) container_info = deploy_messages[0] container_id = container_info.id def cleanup(): remove_container_messages = list(core.remove_container([container_id])) remove_container_message = remove_container_messages[0] assert remove_container_message.success request.addfinalizer(cleanup) assert len(deploy_messages) == 1 deploy_message = deploy_messages[0] assert not deploy_message.error assert deploy_message.memory == default_memory assert deploy_message.podname == default_podname network = deploy_message.publish assert len(network) == 1 network_name, ip = network.popitem() assert network_name == default_network_name
def _get_pod(name): pod = get_core(g.zone).get_pod(name) if not pod: abort(404, 'pod `%s` not found' % name) return pod
def get_node(self): return get_core(self.zone).get_node(self.podname, self.nodename)
def test_create_container(watch_etcd, request, test_app_image): release = Release.get_by_app_and_sha(default_appname, default_sha) release.update_image(test_app_image) combo = release.app.get_combo(default_combo_name) combo.update(extra_args=default_extra_args) create_container_message = create_container( DEFAULT_ZONE, FAKE_USER['id'], default_appname, default_sha, default_combo_name, )[0] assert not create_container_message.error # test if hook is executed assert hook_proof in create_container_message.hook.decode('utf-8') container_id = create_container_message.id def cleanup(): remove_message = remove_container(container_id)[0] assert remove_message.success # test if hook is executed assert hook_proof in remove_message.message request.addfinalizer(cleanup) container = Container.get_by_container_id(container_id) # agent 肯定还没探测到, 所以 deploy_info 应该是默认值 assert container.deploy_info == {} assert container.combo_name == default_combo_name assert container.podname == default_podname assert container.memory == default_memory assert float(container.cpu_quota) == default_cpu_quota # check etcd data at /eru-core/deploy/test-app/web assert container.wait_for_erection() etcd = get_etcd(DEFAULT_ZONE) deploy_info = json.loads(etcd.read(container.core_deploy_key).value) # check watch_etcd process is actually working assert container.deploy_info == deploy_info assert deploy_info['Healthy'] is True assert deploy_info['Extend']['healthcheck_tcp'] == '' assert deploy_info['Extend']['healthcheck_http'] == str(default_publish[0]) assert deploy_info['Extend']['healthcheck_url'] == '/{}'.format(artifact_filename) assert int(deploy_info['Extend']['healthcheck_code']) == 200 publish = deploy_info['Publish'] assert len(publish) == 1 network_name, address = publish.popitem() ip = address.split(':', 1)[0] # test if web entrypoint is up artifact_url = 'http://{}:{}/{}'.format(ip, default_publish[0], artifact_filename) artifact_response = requests.get(artifact_url) assert artifact_content in artifact_response.text core = get_core(DEFAULT_ZONE) container_info = json.loads(core.get_container(container_id).info) assert '/tmp:/home/test-app/tmp:rw' in container_info['HostConfig']['Binds'] # check environment variables from combo is actually injected into the # container left_env_vars = set(default_env.to_env_vars()) right_env_vars = set(container_info['Config']['Env']) assert left_env_vars.intersection(right_env_vars) == left_env_vars # verify extra_args has been correctly appended left_command = '{} {}'.format(default_entrypoints['web']['cmd'], default_extra_args) right_command = ' '.join(container_info['Config']['Cmd']) assert left_command == right_command