Example #1
0
 async def resolve_live_stat(self, info):
     rs = info.context['redis_stat']
     live_stat = await redis.execute_with_retries(
         lambda: rs.get(str(self.id), encoding=None))
     if live_stat is not None:
         live_stat = msgpack.unpackb(live_stat)
     return live_stat
Example #2
0
async def query_owned_dotfiles(
    conn: SAConnection,
    access_key: AccessKey,
) -> Tuple[List[Dotfile], int]:
    query = (sa.select([
        keypairs.c.dotfiles
    ]).select_from(keypairs).where(keypairs.c.access_key == access_key))
    packed_dotfile = await conn.scalar(query)
    rows = msgpack.unpackb(packed_dotfile)
    return rows, MAXIMUM_DOTFILE_SIZE - len(packed_dotfile)
Example #3
0
 async def feed_and_get_status(self):
     if self.input_sock.closed:
         raise asyncio.CancelledError
     await self.input_sock.send_multipart([b'status', b''])
     try:
         result = await self.status_queue.get()
         self.status_queue.task_done()
         return msgpack.unpackb(result)
     except asyncio.CancelledError:
         return None
Example #4
0
 async def _resolve_live_stat(
     cls,
     redis_stat: Redis,
     kernel_id: str,
 ) -> Optional[Mapping[str, Any]]:
     cstat = await redis.execute_with_retries(
         lambda: redis_stat.get(kernel_id, encoding=None))
     if cstat is not None:
         cstat = msgpack.unpackb(cstat)
     return cstat
Example #5
0
 async def resolve_mem_cur_bytes(self, info):
     rs = info.context['redis_stat']
     live_stat = await redis.execute_with_retries(
         lambda: rs.get(str(self.id), encoding=None))
     if live_stat is not None:
         live_stat = msgpack.unpackb(live_stat)
         try:
             return int(live_stat['node']['mem']['current'])
         except (KeyError, TypeError, ValueError):
             return 0
     return 0
Example #6
0
async def query_domain_dotfiles(
    conn: SAConnection,
    name: str,
) -> Tuple[Union[List[DomainDotfile], None], Union[int, None]]:
    query = (sa.select([domains.c.dotfiles
                        ]).select_from(domains).where(domains.c.name == name))
    packed_dotfile = await conn.scalar(query)
    if packed_dotfile is None:
        return None, None
    rows = msgpack.unpackb(packed_dotfile)
    return rows, MAXIMUM_DOTFILE_SIZE - len(packed_dotfile)
Example #7
0
async def query_group_dotfiles(
    conn: SAConnection,
    group_id: Union[GUID, uuid.UUID],
) -> Tuple[Union[List[GroupDotfile], None], Union[int, None]]:
    query = (sa.select([groups.c.dotfiles
                        ]).select_from(groups).where(groups.c.id == group_id))
    packed_dotfile = await conn.scalar(query)
    if packed_dotfile is None:
        return None, None
    rows = msgpack.unpackb(packed_dotfile)
    return rows, MAXIMUM_DOTFILE_SIZE - len(packed_dotfile)
Example #8
0
 async def resolve_cpu_cur_pct(self, info):
     rs = info.context['redis_stat']
     live_stat = await redis.execute_with_retries(
         lambda: rs.get(str(self.id), encoding=None))
     if live_stat is not None:
         live_stat = msgpack.unpackb(live_stat)
         try:
             return float(live_stat['node']['cpu_util']['pct'])
         except (KeyError, TypeError, ValueError):
             return 0.0
     return 0.0
Example #9
0
 async def _consume(self) -> None:
     while True:
         try:
             key, raw_msg = await redis.execute_with_retries(
                 lambda: self.redis_consumer.blpop('events.prodcons'))
             msg = msgpack.unpackb(raw_msg)
             await self.dispatch_consumers(msg['event_name'],
                                           msg['agent_id'], msg['args'])
         except asyncio.CancelledError:
             break
         except Exception:
             log.exception('EventDispatcher.consume(): unexpected-error')
Example #10
0
 async def resolve_live_stat(self, info: graphene.ResolveInfo):
     if not hasattr(self, 'status'):
         return None
     rs = info.context['redis_stat']
     if self.status in LIVE_STATUS:
         raw_live_stat = await redis.execute_with_retries(
             lambda: rs.get(str(self.id), encoding=None))
         if raw_live_stat is not None:
             live_stat = msgpack.unpackb(raw_live_stat)
             return live_stat
         return None
     else:
         return self.last_stat
Example #11
0
async def event_subscriber(dispatcher):
    event_sock = await aiozmq.create_zmq_stream(
        zmq.PULL, connect=EVENT_IPC_ADDR)
    try:
        while True:
            data = await event_sock.read()
            event_name = data[0].decode('ascii')
            agent_id = data[1].decode('utf8')
            args = msgpack.unpackb(data[2])
            dispatcher.dispatch(event_name, agent_id, args)
    except asyncio.CancelledError:
        pass
    except:
        log.exception('unexpected error')
    finally:
        event_sock.close()
Example #12
0
async def event_subscriber(dispatcher):
    ctx = zmq.asyncio.Context()
    event_sock = ctx.socket(zmq.PULL)
    event_sock.connect(EVENT_IPC_ADDR)
    try:
        while True:
            try:
                data = await event_sock.recv_multipart()
                if not data:
                    break
                event_name = data[0].decode('ascii')
                agent_id = data[1].decode('utf8')
                args = msgpack.unpackb(data[2])
                await dispatcher.dispatch(event_name, agent_id, args)
            except asyncio.CancelledError:
                break
            except Exception:
                log.exception('unexpected error -- resuming operation')
    finally:
        event_sock.close()
        ctx.term()
Example #13
0
    async def handle_heartbeat(self, agent_id, agent_info):

        now = datetime.now(tzutc())

        # Update "last seen" timestamp for liveness tracking
        await self.redis_live.hset('last_seen', agent_id, now.timestamp())

        # Check and update status of the agent record in DB
        async with self.dbpool.acquire() as conn, conn.begin():
            # TODO: check why sa.column('status') does not work
            query = (sa.select([
                agents.c.status, agents.c.mem_slots, agents.c.cpu_slots,
                agents.c.gpu_slots
            ],
                               for_update=True).select_from(agents).where(
                                   agents.c.id == agent_id))
            result = await conn.execute(query)
            row = await result.first()
            ob_factors = await self.config_server.get_overbook_factors()
            reported_mem_slots = int(
                Decimal(agent_info['mem_slots']) * Decimal(ob_factors['mem']))
            reported_cpu_slots = float(
                Decimal(agent_info['cpu_slots']) * Decimal(ob_factors['cpu']))
            reported_gpu_slots = float(
                Decimal(agent_info['gpu_slots']) * Decimal(ob_factors['gpu']))
            if row is None or row.status is None:
                # new agent detected!
                log.info('agent {0} joined!', agent_id)
                query = agents.insert().values({
                    'id': agent_id,
                    'status': AgentStatus.ALIVE,
                    'region': agent_info['region'],
                    'mem_slots': reported_mem_slots,
                    'cpu_slots': reported_cpu_slots,
                    'gpu_slots': reported_gpu_slots,
                    'used_mem_slots': 0,
                    'used_cpu_slots': 0,
                    'used_gpu_slots': 0,
                    'addr': agent_info['addr'],
                    'first_contact': now,
                    'lost_at': None,
                })
                result = await conn.execute(query)
                assert result.rowcount == 1
            elif row.status == AgentStatus.ALIVE:
                changed_cols = {}
                if row.mem_slots != reported_mem_slots:
                    changed_cols['mem_slots'] = reported_mem_slots
                if row.cpu_slots != reported_cpu_slots:
                    changed_cols['cpu_slots'] = reported_cpu_slots
                if row.gpu_slots != reported_gpu_slots:
                    changed_cols['gpu_slots'] = reported_gpu_slots
                if changed_cols:
                    query = (sa.update(agents).values(changed_cols).where(
                        agents.c.id == agent_id))
                    await conn.execute(query)
            elif row.status in (AgentStatus.LOST, AgentStatus.TERMINATED):
                log.warning('agent {0} revived!', agent_id)
                query = (sa.update(agents).values({
                    'status':
                    AgentStatus.ALIVE,
                    'region':
                    agent_info['region'],
                    'addr':
                    agent_info['addr'],
                    'lost_at':
                    None,
                    'mem_slots':
                    reported_mem_slots,
                    'cpu_slots':
                    reported_cpu_slots,
                    'gpu_slots':
                    reported_gpu_slots,
                }).where(agents.c.id == agent_id))
                await conn.execute(query)
            else:
                log.error('should not reach here! {0}', type(row.status))

        # Update the mapping of kernel images to agents.
        images = msgpack.unpackb(snappy.decompress(agent_info['images']))
        pipe = self.redis_image.pipeline()
        for image in images:
            pipe.sadd(image[0], agent_id)
        await pipe.execute()
Example #14
0
 async def _subscribe_impl():
     channels = await self.redis_subscriber.subscribe('events.pubsub')
     async for raw_msg in channels[0].iter():
         msg = msgpack.unpackb(raw_msg)
         await self.dispatch_subscribers(msg['event_name'],
                                         msg['agent_id'], msg['args'])
Example #15
0
 async def resolve_mem_cur_bytes(self, info):
     rs = info.context['redis_stat']
     live_stat = await rs.get(str(self.id), encoding=None)
     if live_stat is not None:
         live_stat = msgpack.unpackb(live_stat)
     return float(live_stat['node']['mem']['current'])
Example #16
0
 async def resolve_cpu_cur_pct(self, info):
     rs = info.context['redis_stat']
     live_stat = await rs.get(str(self.id), encoding=None)
     if live_stat is not None:
         live_stat = msgpack.unpackb(live_stat)
     return float(live_stat['node']['cpu_util']['pct'])
Example #17
0
 async def _resolve_live_stat(cls, redis_stat, kernel_id):
     cstat = await redis_stat.get(kernel_id, encoding=None)
     if cstat is not None:
         cstat = msgpack.unpackb(cstat)
     return cstat
Example #18
0
async def recv_deserialized(sock):
    msg = await sock.recv_multipart()
    return [msgpack.unpackb(v) for v in msg]
Example #19
0
 async def _resolve_live_stat(cls, redis_stat, kernel_id):
     cstat = await redis.execute_with_retries(
         lambda: redis_stat.get(kernel_id, encoding=None))
     if cstat is not None:
         cstat = msgpack.unpackb(cstat)
     return cstat
Example #20
0
 async def resolve_live_stat(self, info):
     rs = info.context['redis_stat']
     live_stat = await rs.get(str(self.id), encoding=None)
     if live_stat is not None:
         live_stat = msgpack.unpackb(live_stat)
     return live_stat