async def resolve_live_stat(self, info): rs = info.context['redis_stat'] live_stat = await redis.execute_with_retries( lambda: rs.get(str(self.id), encoding=None)) if live_stat is not None: live_stat = msgpack.unpackb(live_stat) return live_stat
async def query_owned_dotfiles( conn: SAConnection, access_key: AccessKey, ) -> Tuple[List[Dotfile], int]: query = (sa.select([ keypairs.c.dotfiles ]).select_from(keypairs).where(keypairs.c.access_key == access_key)) packed_dotfile = await conn.scalar(query) rows = msgpack.unpackb(packed_dotfile) return rows, MAXIMUM_DOTFILE_SIZE - len(packed_dotfile)
async def feed_and_get_status(self): if self.input_sock.closed: raise asyncio.CancelledError await self.input_sock.send_multipart([b'status', b'']) try: result = await self.status_queue.get() self.status_queue.task_done() return msgpack.unpackb(result) except asyncio.CancelledError: return None
async def _resolve_live_stat( cls, redis_stat: Redis, kernel_id: str, ) -> Optional[Mapping[str, Any]]: cstat = await redis.execute_with_retries( lambda: redis_stat.get(kernel_id, encoding=None)) if cstat is not None: cstat = msgpack.unpackb(cstat) return cstat
async def resolve_mem_cur_bytes(self, info): rs = info.context['redis_stat'] live_stat = await redis.execute_with_retries( lambda: rs.get(str(self.id), encoding=None)) if live_stat is not None: live_stat = msgpack.unpackb(live_stat) try: return int(live_stat['node']['mem']['current']) except (KeyError, TypeError, ValueError): return 0 return 0
async def query_domain_dotfiles( conn: SAConnection, name: str, ) -> Tuple[Union[List[DomainDotfile], None], Union[int, None]]: query = (sa.select([domains.c.dotfiles ]).select_from(domains).where(domains.c.name == name)) packed_dotfile = await conn.scalar(query) if packed_dotfile is None: return None, None rows = msgpack.unpackb(packed_dotfile) return rows, MAXIMUM_DOTFILE_SIZE - len(packed_dotfile)
async def query_group_dotfiles( conn: SAConnection, group_id: Union[GUID, uuid.UUID], ) -> Tuple[Union[List[GroupDotfile], None], Union[int, None]]: query = (sa.select([groups.c.dotfiles ]).select_from(groups).where(groups.c.id == group_id)) packed_dotfile = await conn.scalar(query) if packed_dotfile is None: return None, None rows = msgpack.unpackb(packed_dotfile) return rows, MAXIMUM_DOTFILE_SIZE - len(packed_dotfile)
async def resolve_cpu_cur_pct(self, info): rs = info.context['redis_stat'] live_stat = await redis.execute_with_retries( lambda: rs.get(str(self.id), encoding=None)) if live_stat is not None: live_stat = msgpack.unpackb(live_stat) try: return float(live_stat['node']['cpu_util']['pct']) except (KeyError, TypeError, ValueError): return 0.0 return 0.0
async def _consume(self) -> None: while True: try: key, raw_msg = await redis.execute_with_retries( lambda: self.redis_consumer.blpop('events.prodcons')) msg = msgpack.unpackb(raw_msg) await self.dispatch_consumers(msg['event_name'], msg['agent_id'], msg['args']) except asyncio.CancelledError: break except Exception: log.exception('EventDispatcher.consume(): unexpected-error')
async def resolve_live_stat(self, info: graphene.ResolveInfo): if not hasattr(self, 'status'): return None rs = info.context['redis_stat'] if self.status in LIVE_STATUS: raw_live_stat = await redis.execute_with_retries( lambda: rs.get(str(self.id), encoding=None)) if raw_live_stat is not None: live_stat = msgpack.unpackb(raw_live_stat) return live_stat return None else: return self.last_stat
async def event_subscriber(dispatcher): event_sock = await aiozmq.create_zmq_stream( zmq.PULL, connect=EVENT_IPC_ADDR) try: while True: data = await event_sock.read() event_name = data[0].decode('ascii') agent_id = data[1].decode('utf8') args = msgpack.unpackb(data[2]) dispatcher.dispatch(event_name, agent_id, args) except asyncio.CancelledError: pass except: log.exception('unexpected error') finally: event_sock.close()
async def event_subscriber(dispatcher): ctx = zmq.asyncio.Context() event_sock = ctx.socket(zmq.PULL) event_sock.connect(EVENT_IPC_ADDR) try: while True: try: data = await event_sock.recv_multipart() if not data: break event_name = data[0].decode('ascii') agent_id = data[1].decode('utf8') args = msgpack.unpackb(data[2]) await dispatcher.dispatch(event_name, agent_id, args) except asyncio.CancelledError: break except Exception: log.exception('unexpected error -- resuming operation') finally: event_sock.close() ctx.term()
async def handle_heartbeat(self, agent_id, agent_info): now = datetime.now(tzutc()) # Update "last seen" timestamp for liveness tracking await self.redis_live.hset('last_seen', agent_id, now.timestamp()) # Check and update status of the agent record in DB async with self.dbpool.acquire() as conn, conn.begin(): # TODO: check why sa.column('status') does not work query = (sa.select([ agents.c.status, agents.c.mem_slots, agents.c.cpu_slots, agents.c.gpu_slots ], for_update=True).select_from(agents).where( agents.c.id == agent_id)) result = await conn.execute(query) row = await result.first() ob_factors = await self.config_server.get_overbook_factors() reported_mem_slots = int( Decimal(agent_info['mem_slots']) * Decimal(ob_factors['mem'])) reported_cpu_slots = float( Decimal(agent_info['cpu_slots']) * Decimal(ob_factors['cpu'])) reported_gpu_slots = float( Decimal(agent_info['gpu_slots']) * Decimal(ob_factors['gpu'])) if row is None or row.status is None: # new agent detected! log.info('agent {0} joined!', agent_id) query = agents.insert().values({ 'id': agent_id, 'status': AgentStatus.ALIVE, 'region': agent_info['region'], 'mem_slots': reported_mem_slots, 'cpu_slots': reported_cpu_slots, 'gpu_slots': reported_gpu_slots, 'used_mem_slots': 0, 'used_cpu_slots': 0, 'used_gpu_slots': 0, 'addr': agent_info['addr'], 'first_contact': now, 'lost_at': None, }) result = await conn.execute(query) assert result.rowcount == 1 elif row.status == AgentStatus.ALIVE: changed_cols = {} if row.mem_slots != reported_mem_slots: changed_cols['mem_slots'] = reported_mem_slots if row.cpu_slots != reported_cpu_slots: changed_cols['cpu_slots'] = reported_cpu_slots if row.gpu_slots != reported_gpu_slots: changed_cols['gpu_slots'] = reported_gpu_slots if changed_cols: query = (sa.update(agents).values(changed_cols).where( agents.c.id == agent_id)) await conn.execute(query) elif row.status in (AgentStatus.LOST, AgentStatus.TERMINATED): log.warning('agent {0} revived!', agent_id) query = (sa.update(agents).values({ 'status': AgentStatus.ALIVE, 'region': agent_info['region'], 'addr': agent_info['addr'], 'lost_at': None, 'mem_slots': reported_mem_slots, 'cpu_slots': reported_cpu_slots, 'gpu_slots': reported_gpu_slots, }).where(agents.c.id == agent_id)) await conn.execute(query) else: log.error('should not reach here! {0}', type(row.status)) # Update the mapping of kernel images to agents. images = msgpack.unpackb(snappy.decompress(agent_info['images'])) pipe = self.redis_image.pipeline() for image in images: pipe.sadd(image[0], agent_id) await pipe.execute()
async def _subscribe_impl(): channels = await self.redis_subscriber.subscribe('events.pubsub') async for raw_msg in channels[0].iter(): msg = msgpack.unpackb(raw_msg) await self.dispatch_subscribers(msg['event_name'], msg['agent_id'], msg['args'])
async def resolve_mem_cur_bytes(self, info): rs = info.context['redis_stat'] live_stat = await rs.get(str(self.id), encoding=None) if live_stat is not None: live_stat = msgpack.unpackb(live_stat) return float(live_stat['node']['mem']['current'])
async def resolve_cpu_cur_pct(self, info): rs = info.context['redis_stat'] live_stat = await rs.get(str(self.id), encoding=None) if live_stat is not None: live_stat = msgpack.unpackb(live_stat) return float(live_stat['node']['cpu_util']['pct'])
async def _resolve_live_stat(cls, redis_stat, kernel_id): cstat = await redis_stat.get(kernel_id, encoding=None) if cstat is not None: cstat = msgpack.unpackb(cstat) return cstat
async def recv_deserialized(sock): msg = await sock.recv_multipart() return [msgpack.unpackb(v) for v in msg]
async def _resolve_live_stat(cls, redis_stat, kernel_id): cstat = await redis.execute_with_retries( lambda: redis_stat.get(kernel_id, encoding=None)) if cstat is not None: cstat = msgpack.unpackb(cstat) return cstat
async def resolve_live_stat(self, info): rs = info.context['redis_stat'] live_stat = await rs.get(str(self.id), encoding=None) if live_stat is not None: live_stat = msgpack.unpackb(live_stat) return live_stat