def store_and_broadcast(self, iterable): """iter完这个generator并且返回最后一个""" line = '' for line in iterable: rds.rpush(self.log_key, line) rds.publish(self.publish_key, line) return line
def notify_agent(self, container): if not container: return if ERU_AGENT_API == 'pubsub': watcher_key = ERU_AGENT_WATCHERKEY % self.task.host.name message = '+|%s|%s' % (container.container_id, json.dumps(container.meta)) rds.publish(watcher_key, message) elif ERU_AGENT_API == 'http': agent = get_agent(container.host) agent.add_container(container)
def _bind_container_ip(): rds.publish(pub_agent_vlan_key, '|'.join(values)) for _ in ips: rv = rds.blpop(feedback_key, 15) if rv is None: break succ = rv[1].split('|')[0] if succ == '0': break else: return True rds.delete(feedback_key) return False
def delete(self): """删除这条记录, 记得要释放自己占用的资源""" from .host import Host # release ip [ip.release() for ip in self.ips] # release core and increase core count host = self.host cores = self.cores host.release_cores(cores, cores.get('nshare', 0)) del self.cores host.count = Host.count + \ D(len(cores.get('full', []))) + \ D(format(D(cores.get('nshare', 0)) / D(host.core_share), '.3f')) db.session.add(host) # remove container db.session.delete(self) db.session.commit() rds.publish(_CONTAINER_PUB_KEY % self.appname, json.dumps({'container': self.container_id, 'status': 'delete'}))
def _bind_container_ip_pubsub(task_id, container, ips, nid=None): pub_agent_vlan_key = 'eru:agent:%s:vlan' % container.host.name feedback_key = 'eru:agent:%s:feedback' % task_id values = [task_id, container.container_id] values += ['{0}:{1}'.format(nid or ip.vlan_seq_id, ip.vlan_address) for ip in ips] rds.publish(pub_agent_vlan_key, '|'.join(values)) for _ in ips: rv = rds.blpop(feedback_key, 15) if rv is None: break succ = rv[1].split('|')[0] if succ == '0': break else: return True rds.delete(feedback_key) return False
def create(cls, container_id, host, version, name, entrypoint, cores, env, nshare=0): """创建一个容器. cores 是 {'full': [core, ...], 'part': [core, ...]}""" from .host import Host try: container = cls(container_id, host, version, name, entrypoint, env) db.session.add(container) host.count = Host.count - D(len(cores.get("full", []))) - D(format(D(nshare) / D(host.core_share), ".3f")) db.session.add(host) db.session.commit() cores["nshare"] = nshare container.cores = cores rds.publish( _CONTAINER_PUB_KEY % name.split("_")[0], json.dumps({"container": container_id, "status": "create"}) ) return container except sqlalchemy.exc.IntegrityError: db.session.rollback() return None
def delete(self): """删除这条记录, 记得要释放自己占用的资源""" from .host import Host # release ip [ip.release() for ip in self.ips] # release core and increase core count host = self.host cores = self.cores host.release_cores(cores, cores.get("nshare", 0)) del self.cores host.count = ( Host.count + D(len(cores.get("full", []))) + D(format(D(cores.get("nshare", 0)) / D(host.core_share), ".3f")) ) db.session.add(host) # remove container db.session.delete(self) db.session.commit() rds.publish(_CONTAINER_PUB_KEY % self.appname, json.dumps({"container": self.container_id, "status": "delete"}))
def create(cls, container_id, host, version, name, entrypoint, cores, env, nshare=0, callback_url=''): """创建一个容器. cores 是 {'full': [core, ...], 'part': [core, ...]}""" from .host import Host try: container = cls(container_id, host, version, name, entrypoint, env) db.session.add(container) host.count = Host.count - \ D(len(cores.get('full', []))) - \ D(format(D(nshare) / D(host.core_share), '.3f')) db.session.add(host) db.session.commit() cores['nshare'] = nshare container.cores = cores container.set_props(callback_url=callback_url) rds.publish(_CONTAINER_PUB_KEY % name.split('_')[0], json.dumps({'container': container_id, 'status': 'create'})) return container except sqlalchemy.exc.IntegrityError: db.session.rollback() return None
def notify_agent(self, cid): watcher_key = ERU_AGENT_WATCHERKEY % self.task.host.name message = '+|%s' % cid rds.publish(watcher_key, message)
def store_and_broadcast(self, iterable): for line in iterable: rds.rpush(self.log_key, line) rds.publish(self.publish_key, line)
def pub_build_finish(self): rds.publish(self.publish_key, PUB_END_MESSAGE)
def pub_fail(self): rds.publish(self.result_key, TASK_RESULT_FAILED)
def pub_success(self): rds.publish(self.result_key, TASK_RESULT_SUCCESS)
def publish_to_service_discovery(*appnames): for appname in appnames: rds.publish('eru:discovery:published', appname)
def create_containers_with_macvlan_public(task_id, ncontainer, nshare, network_ids, spec_ips=None): """ 执行task_id的任务. 部署ncontainer个容器, 绑定到networks这些子网 """ current_flask.logger.info('Task<id=%s>: Started', task_id) task = Task.get(task_id) if not task: current_flask.logger.error('Task (id=%s) not found, quit', task_id) return if spec_ips is None: spec_ips = [] networks = Network.get_multi(network_ids) notifier = TaskNotifier(task) host = task.host version = task.version entrypoint = task.props['entrypoint'] env = task.props['env'] # use raw image = task.props['image'] cpu_shares = 1024 pub_agent_vlan_key = 'eru:agent:%s:vlan' % host.name feedback_key = 'eru:agent:%s:feedback' % task_id cids = [] for _ in range(ncontainer): try: cid, cname = dockerjob.create_one_container(host, version, entrypoint, env, cores=None, cpu_shares=cpu_shares, image=image) except Exception as e: print e # 同上 continue if spec_ips: ips = [n.acquire_specific_ip(ip) for n, ip in zip(networks, spec_ips)] else: ips = [n.acquire_ip() for n in networks] ips = [i for i in ips if i] ip_dict = {ip.vlan_address: ip for ip in ips} if ips: ident_id = cname.split('_')[-1] values = [str(task_id), cid, ident_id] + ['{0}:{1}'.format(ip.vlan_seq_id, ip.vlan_address) for ip in ips] rds.publish(pub_agent_vlan_key, '|'.join(values)) for _ in ips: # timeout 15s rv = rds.blpop(feedback_key, 15) if rv is None: break # rv is like (feedback_key, 'succ|container_id|vethname|ip') succ, _, vethname, vlan_address = rv[1].split('|') if succ == '0': break ip = ip_dict.get(vlan_address, None) if ip: ip.set_vethname(vethname) else: current_flask.logger.info('Creating container (cid=%s, ips=%s)', cid, ips) c = Container.create(cid, host, version, cname, entrypoint, {}, env, nshare) for ip in ips: ip.assigned_to_container(c) notifier.notify_agent(cid) add_container_for_agent(c) add_container_backends(c) cids.append(cid) # 略过清理工作 continue # 清理掉失败的容器, 释放核, 释放ip current_flask.logger.info('Cleaning failed container (cid=%s)', cid) dockerjob.remove_container_by_cid([cid], host) [ip.release() for ip in ips] # 失败了就得清理掉这个key rds.delete(feedback_key) publish_to_service_discovery(version.name) task.finish_with_result(consts.TASK_SUCCESS, container_ids=cids) notifier.pub_success() current_flask.logger.info('Task<id=%s>: Done', task_id)
def create_containers_with_macvlan(task_id, ncontainer, nshare, cores, network_ids, spec_ips=None): """ 执行task_id的任务. 部署ncontainer个容器, 占用*_core_ids这些核, 绑定到networks这些子网 """ current_flask.logger.info('Task<id=%s>: Started', task_id) task = Task.get(task_id) if not task: current_flask.logger.error('Task (id=%s) not found, quit', task_id) return if spec_ips is None: spec_ips = [] need_network = bool(network_ids) networks = Network.get_multi(network_ids) notifier = TaskNotifier(task) host = task.host version = task.version entrypoint = task.props['entrypoint'] env = task.props['env'] ports = task.props['ports'] args = task.props['args'] # use raw route = task.props['route'] image = task.props['image'] callback_url = task.props['callback_url'] cpu_shares = int(float(nshare) / host.pod.core_share * 1024) if nshare else 1024 pub_agent_vlan_key = 'eru:agent:%s:vlan' % host.name pub_agent_route_key = 'eru:agent:%s:route' % host.name feedback_key = 'eru:agent:%s:feedback' % task_id cids = [] for fcores, pcores in _iter_cores(cores, ncontainer): cores_for_one_container = {'full': fcores, 'part': pcores} try: cid, cname = dockerjob.create_one_container(host, version, entrypoint, env, fcores+pcores, ports=ports, args=args, cpu_shares=cpu_shares, image=image, need_network=need_network) except Exception as e: # 写给celery日志看 print e host.release_cores(cores_for_one_container, nshare) continue if spec_ips: ips = [n.acquire_specific_ip(ip) for n, ip in zip(networks, spec_ips)] else: ips = [n.acquire_ip() for n in networks] ips = [i for i in ips if i] ip_dict = {ip.vlan_address: ip for ip in ips} if ips: if ERU_AGENT_API == 'pubsub': values = [str(task_id), cid] + ['{0}:{1}'.format(ip.vlan_seq_id, ip.vlan_address) for ip in ips] rds.publish(pub_agent_vlan_key, '|'.join(values)) elif ERU_AGENT_API == 'http': agent = get_agent(host) ip_list = [(ip.vlan_seq_id, ip.vlan_address) for ip in ips] agent.add_container_vlan(cid, str(task_id), ip_list) for _ in ips: # timeout 15s rv = rds.blpop(feedback_key, 15) if rv is None: break # rv is like (feedback_key, 'succ|container_id|vethname|ip') succ, _, vethname, vlan_address = rv[1].split('|') if succ == '0': break ip = ip_dict.get(vlan_address, None) if ip: ip.set_vethname(vethname) if route: rds.publish(pub_agent_route_key, '%s|%s' % (cid, route)) else: current_flask.logger.info('Creating container (cid=%s, ips=%s)', cid, ips) c = Container.create(cid, host, version, cname, entrypoint, cores_for_one_container, env, nshare, callback_url) for ip in ips: ip.assigned_to_container(c) notifier.notify_agent(c) add_container_for_agent(c) add_container_backends(c) cids.append(cid) # 略过清理工作 continue # 清理掉失败的容器, 释放核, 释放ip current_flask.logger.info('Cleaning failed container (cid=%s)', cid) dockerjob.remove_container_by_cid([cid], host) host.release_cores(cores_for_one_container, nshare) [ip.release() for ip in ips] # 失败了就得清理掉这个key rds.delete(feedback_key) publish_to_service_discovery(version.name) task.finish_with_result(consts.TASK_SUCCESS, container_ids=cids) notifier.pub_success() # 有IO, 丢最后面算了 falcon_all_graphs(version) falcon_all_alarms(version) current_flask.logger.info('Task<id=%s>: Done', task_id)
def cure(self): self.is_alive = 1 db.session.add(self) db.session.commit() rds.publish(_CONTAINER_PUB_KEY % self.appname, json.dumps({'container': self.container_id, 'status': 'up'}))
def create_containers_with_macvlan(task_id, ncontainer, nshare, cores, network_ids, spec_ips=None): """ 执行task_id的任务. 部署ncontainer个容器, 占用*_core_ids这些核, 绑定到networks这些子网 """ # TODO support part core current_flask.logger.info("Task<id=%s>: Started", task_id) task = Task.get(task_id) if not task: current_flask.logger.error("Task (id=%s) not found, quit", task_id) return if spec_ips is None: spec_ips = [] networks = Network.get_multi(network_ids) notifier = TaskNotifier(task) host = task.host version = task.version entrypoint = task.props["entrypoint"] env = task.props["env"] # use raw image = task.props["image"] cpu_shares = int(float(nshare) / host.pod.core_share * 1024) if nshare else 1024 pub_agent_vlan_key = "eru:agent:%s:vlan" % host.name feedback_key = "eru:agent:%s:feedback" % task_id cids = [] full_cores, part_cores = cores.get("full", []), cores.get("part", []) for fcores, pcores in izip_longest( chunked(full_cores, len(full_cores) / ncontainer), chunked(part_cores, len(part_cores) / ncontainer), fillvalue=[], ): cores_for_one_container = {"full": fcores, "part": pcores} try: cid, cname = dockerjob.create_one_container( host, version, entrypoint, env, fcores + pcores, cpu_shares, image=image ) except: host.release_cores(cores_for_one_container, nshare) continue if spec_ips: ips = [n.acquire_specific_ip(ip) for n, ip in zip(networks, spec_ips)] else: ips = [n.acquire_ip() for n in networks] ips = [i for i in ips if i] ip_dict = {ip.vlan_address: ip for ip in ips} if ips: ident_id = cname.split("_")[-1] values = [str(task_id), cid, ident_id] + ["{0}:{1}".format(ip.vlan_seq_id, ip.vlan_address) for ip in ips] rds.publish(pub_agent_vlan_key, "|".join(values)) for _ in ips: # timeout 15s rv = rds.blpop(feedback_key, 15) if rv is None: break # rv is like (feedback_key, 'succ|container_id|vethname|ip') succ, _, vethname, vlan_address = rv[1].split("|") if succ == "0": break ip = ip_dict.get(vlan_address, None) if ip: ip.set_vethname(vethname) else: current_flask.logger.info("Creating container (cid=%s, ips=%s)", cid, ips) c = Container.create(cid, host, version, cname, entrypoint, cores_for_one_container, env, nshare) for ip in ips: ip.assigned_to_container(c) notifier.notify_agent(cid) add_container_for_agent(c) add_container_backends(c) cids.append(cid) # 略过清理工作 continue # 清理掉失败的容器, 释放核, 释放ip current_flask.logger.info("Cleaning failed container (cid=%s)", cid) dockerjob.remove_container_by_cid([cid], host) host.release_cores(cores_for_one_container, nshare) [ip.release() for ip in ips] # 失败了就得清理掉这个key rds.delete(feedback_key) publish_to_service_discovery(version.name) task.finish_with_result(consts.TASK_SUCCESS, container_ids=cids) notifier.pub_success() current_flask.logger.info("Task<id=%s>: Done", task_id)
def notify_agent(self, container): if not container: return watcher_key = ERU_AGENT_WATCHERKEY % self.task.host.name message = '+|%s|%s' % (container.container_id, json.dumps(container.meta)) rds.publish(watcher_key, message)
def cure(self): self.is_alive = 1 db.session.add(self) db.session.commit() rds.publish(_CONTAINER_PUB_KEY % self.appname, json.dumps({"container": self.container_id, "status": "up"}))