Beispiel #1
0
    def _dump_messages(self, messages, target):
        """
        Helper method to iterate the messages from a GetMessageHistoryRequest
        and dump them into the Dumper, mostly to avoid excessive nesting.

        Also enqueues any media to be downloaded later by a different coroutine.
        """
        for m in messages:
            if isinstance(m, types.Message):
                media_id = self.dumper.dump_media(m.media)
                if media_id and self._check_media(m.media):
                    self.enqueue_media(media_id, utils.get_peer_id(target),
                                       m.from_id, m.date)

                self.dumper.dump_message(message=m,
                                         context_id=utils.get_peer_id(target),
                                         forward_id=self.dumper.dump_forward(
                                             m.fwd_from),
                                         media_id=media_id)
            elif isinstance(m, types.MessageService):
                if isinstance(m.action, types.MessageActionChatEditPhoto):
                    media_id = self.dumper.dump_media(m.action.photo)
                    self.enqueue_photo(m.action.photo,
                                       media_id,
                                       target,
                                       peer_id=m.from_id,
                                       date=m.date)
                else:
                    media_id = None
                self.dumper.dump_message_service(
                    message=m,
                    context_id=utils.get_peer_id(target),
                    media_id=media_id)
Beispiel #2
0
    async def download_past_media(self, dumper, target_id):
        """
        Downloads the past media that has already been dumped into the
        database but has not been downloaded for the given target ID yet.

        Media which formatted filename results in an already-existing file
        will be *ignored* and not re-downloaded again.
        """
        # TODO Should this respect and download only allowed media? Or all?
        target_in = await self.client.get_input_entity(target_id)
        target = await self.client.get_entity(target_in)
        target_id = utils.get_peer_id(target)
        bar = tqdm.tqdm(unit='B',
                        desc='media',
                        unit_divisor=1000,
                        unit_scale=True,
                        bar_format=BAR_FORMAT,
                        total=0,
                        postfix={'chat': utils.get_display_name(target)})

        msg_cursor = dumper.conn.cursor()
        msg_cursor.execute(
            'SELECT ID, Date, FromID, MediaID FROM Message '
            'WHERE ContextID = ? AND MediaID IS NOT NULL', (target_id, ))

        msg_row = msg_cursor.fetchone()
        while msg_row:
            await self._download_media(media_id=msg_row[3],
                                       context_id=target_id,
                                       sender_id=msg_row[2],
                                       date=msg_row[1],
                                       bar=bar)
            msg_row = msg_cursor.fetchone()
Beispiel #3
0
    def enqueue_entities(self, entities):
        """
        Enqueues the given iterable of entities to be dumped later by a
        different coroutine. These in turn might enqueue profile photos.
        """
        for entity in entities:
            eid = utils.get_peer_id(entity)
            self._displays[eid] = utils.get_display_name(entity)
            if isinstance(entity, types.User):
                if entity.deleted or entity.min:
                    continue  # Empty name would cause IntegrityError
            elif isinstance(entity, types.Channel):
                if entity.left:
                    continue  # Getting full info triggers ChannelPrivateError
            elif not isinstance(entity,
                                (types.Chat, types.InputPeerUser,
                                 types.InputPeerChat, types.InputPeerChannel)):
                # Drop UserEmpty, ChatEmpty, ChatForbidden and ChannelForbidden
                continue

            if eid in self._checked_entity_ids:
                continue
            else:
                self._checked_entity_ids.add(eid)
                if isinstance(entity, (types.User, types.InputPeerUser)):
                    self._user_queue.put_nowait(entity)
                else:
                    self._chat_queue.put_nowait(entity)
Beispiel #4
0
    def _dump_admin_log(self, events, target):
        """
        Helper method to iterate the events from a GetAdminLogRequest
        and dump them into the Dumper, mostly to avoid excessive nesting.

        Also enqueues any media to be downloaded later by a different coroutine.
        """
        for event in events:
            assert isinstance(event, types.ChannelAdminLogEvent)
            if isinstance(event.action,
                          types.ChannelAdminLogEventActionChangePhoto):
                media_id1 = self.dumper.dump_media(event.action.new_photo)
                media_id2 = self.dumper.dump_media(event.action.prev_photo)
                self.enqueue_photo(event.action.new_photo,
                                   media_id1,
                                   target,
                                   peer_id=event.user_id,
                                   date=event.date)
                self.enqueue_photo(event.action.prev_photo,
                                   media_id2,
                                   target,
                                   peer_id=event.user_id,
                                   date=event.date)
            else:
                media_id1 = None
                media_id2 = None
            self.dumper.dump_admin_log_event(event, utils.get_peer_id(target),
                                             media_id1, media_id2)
        return min(e.id for e in events)
Beispiel #5
0
    def test_formatter_get_chat(self):
        """
        Ensures that the BaseFormatter is able to fetch the expected
        entities when using a date parameter.
        """
        chat = types.Chat(id=123,
                          title='Some title',
                          photo=types.ChatPhotoEmpty(),
                          participants_count=7,
                          date=datetime.now(),
                          version=1)
        dumper = Dumper(self.dumper_config)

        fmt = BaseFormatter(dumper.conn)
        for month in range(1, 13):
            dumper.dump_chat(chat,
                             None,
                             timestamp=int(
                                 datetime(year=2010, month=month,
                                          day=1).timestamp()))
        dumper.commit()
        cid = tl_utils.get_peer_id(chat)
        # Default should get the most recent version
        date = fmt.get_chat(cid).date_updated
        assert date == datetime(year=2010, month=12, day=1)

        # Expected behaviour is to get the previous available date
        target = datetime(year=2010, month=6, day=29)
        date = fmt.get_chat(cid, target).date_updated
        assert date == datetime(year=2010, month=6, day=1)

        # Expected behaviour is to get the next date if previous unavailable
        target = datetime(year=2009, month=12, day=1)
        date = fmt.get_chat(cid, target).date_updated
        assert date == datetime(year=2010, month=1, day=1)
Beispiel #6
0
def add_read_action(entity, target, action):
    if is_read(entity, target):
        action()
    else:
        read_actions.add(
            MessageAction(chat_id=get_peer_id(entity),
                          message_id=target.id,
                          action=action))
Beispiel #7
0
 def enqueue_photo(self, photo, photo_id, context, peer_id=None, date=None):
     if not photo_id:
         return
     if not isinstance(context, int):
         context = utils.get_peer_id(context)
     if peer_id is None:
         peer_id = context
     if date is None:
         date = getattr(photo, 'date', None) or datetime.datetime.now()
     self.enqueue_media(photo_id, context, peer_id, date)
Beispiel #8
0
def find_fmt_dialog_padding(dialogs):
    """
    Find the correct amount of space padding
    to give dialogs when printing them.
    """
    no_username = NO_USERNAME[:-1]  # Account for the added '@' if username
    return (
        max(len(str(utils.get_peer_id(dialog.entity))) for dialog in dialogs),
        max(
            len(
                getattr(dialog.entity, 'username', no_username) or no_username)
            for dialog in dialogs) + 1)
Beispiel #9
0
def fmt_dialog(dialog, id_pad=0, username_pad=0):
    """
    Space-fill a row with given padding values
    to ensure alignment when printing dialogs.
    """
    username = getattr(dialog.entity, 'username', None)
    username = '******' + username if username else NO_USERNAME
    return '{:<{id_pad}} | {:<{username_pad}} | {}'.format(
        utils.get_peer_id(dialog.entity),
        username,
        dialog.name,
        id_pad=id_pad,
        username_pad=username_pad)
Beispiel #10
0
def ninja(event):
    this_id = get_peer_id(event.input_chat)
    triggered = []
    for action in read_actions:
        if action.chat_id == this_id and event.is_read(action.message_id):
            scheduler.enter(1, 1, action.action)
            triggered.append(action)

    for action in triggered:
        read_actions.remove(action)

    if triggered:
        raise events.StopPropagation
Beispiel #11
0
def get_entity_cached(entity):
    """TelegramClient.get_entity but with a global cache"""
    # convert to hashable int if not already hashable
    key = entity
    if not isinstance(key, (int, str)):
        key = get_peer_id(key)

    # fetch if we dont have it cached
    if key not in ENTITY_CACHE or ENTITY_CACHE[key].is_expired():
        logger.info('fetching entity for {}'.format(key))
        ENTITY_CACHE[key] = CachedEntity(client.get_entity(entity))

    return ENTITY_CACHE[key].entity
Beispiel #12
0
async def get_entities_iter(mode, in_list, client):
    """
    Get a generator of entities to act on given a mode ('blacklist',
    'whitelist') and an input from that mode. If whitelist, generator
    will be asynchronous.
    """
    # TODO change None to empty blacklist?
    mode = mode.lower()
    if mode == 'whitelist':
        assert client is not None
        async for ent in entities_from_str(client, in_list):
            yield ent
    if mode == 'blacklist':
        assert client is not None
        blacklist = entities_from_str(client, in_list)
        avoid = set()
        async for entity in blacklist:
            avoid.add(utils.get_peer_id(entity))
        # TODO Should this get_dialogs call be cached? How?
        for dialog in await client.get_dialogs(limit=None):
            if utils.get_peer_id(dialog.entity) not in avoid:
                yield dialog.entity
        return
Beispiel #13
0
    async def start(self, target_id):
        """
        Starts the dump with the given target ID.
        """
        self._running = True
        self._incomplete_download = None
        target_in = await self.client.get_input_entity(target_id)
        target = await self.client.get_entity(target_in)
        target_id = utils.get_peer_id(target)

        found = self.dumper.get_message_count(target_id)
        chat_name = utils.get_display_name(target)
        msg_bar = tqdm.tqdm(unit=' messages',
                            desc=chat_name,
                            initial=found,
                            bar_format=BAR_FORMAT)
        ent_bar = tqdm.tqdm(unit=' entities',
                            desc='entities',
                            bar_format=BAR_FORMAT,
                            postfix={'chat': chat_name})
        med_bar = tqdm.tqdm(unit='B',
                            desc='media',
                            unit_divisor=1000,
                            unit_scale=True,
                            bar_format=BAR_FORMAT,
                            total=0,
                            postfix={'chat': chat_name})
        # Divisor is 1000 not 1024 since tqdm puts a K not a Ki

        asyncio.ensure_future(self._user_consumer(self._user_queue, ent_bar))
        asyncio.ensure_future(self._chat_consumer(self._chat_queue, ent_bar))
        asyncio.ensure_future(self._media_consumer(self._media_queue, med_bar))
        self.enqueue_entities(self.dumper.iter_resume_entities(target_id))
        for mid, sender_id, date in self.dumper.iter_resume_media(target_id):
            self.enqueue_media(mid, target_id, sender_id, date)

        try:
            self.enqueue_entities((target, ))
            ent_bar.total = len(self._checked_entity_ids)
            req = functions.messages.GetHistoryRequest(
                peer=target_in,
                offset_id=0,
                offset_date=None,
                add_offset=0,
                limit=self.dumper.chunk_size,
                max_id=0,
                min_id=0,
                hash=0)
            if isinstance(target_in,
                          (types.InputPeerChat, types.InputPeerChannel)):
                try:
                    __log__.info('Getting participants...')
                    participants = await self.client.get_participants(target_in
                                                                      )
                    added, removed = self.dumper.dump_participants_delta(
                        target_id, ids=[x.id for x in participants])
                    __log__.info('Saved %d new members, %d left the chat.',
                                 len(added), len(removed))
                except ChatAdminRequiredError:
                    __log__.info('Getting participants aborted (not admin).')

            req.offset_id, req.offset_date, stop_at = self.dumper.get_resume(
                target_id)
            if req.offset_id:
                __log__.info('Resuming at %s (%s)', req.offset_date,
                             req.offset_id)

            # Check if we have access to the admin log
            # TODO Resume admin log?
            # Rather silly considering logs only last up to two days and
            # there isn't much information in them (due to their short life).
            if isinstance(target_in, types.InputPeerChannel):
                log_req = functions.channels.GetAdminLogRequest(target_in,
                                                                q='',
                                                                min_id=0,
                                                                max_id=0,
                                                                limit=1)
                try:
                    await self.client(log_req)
                    log_req.limit = 100
                except ChatAdminRequiredError:
                    log_req = None
            else:
                log_req = None

            chunks_left = self.dumper.max_chunks
            # This loop is for get history, although the admin log
            # is interlaced as well to dump both at the same time.
            while self._running:
                start = time.time()
                history = await self.client(req)
                # Queue found entities so they can be dumped later
                self.enqueue_entities(
                    itertools.chain(history.users, history.chats))
                ent_bar.total = len(self._checked_entity_ids)

                # Dump the messages from this batch
                self._dump_messages(history.messages, target)

                # Determine whether to continue dumping or we're done
                count = len(history.messages)
                msg_bar.total = getattr(history, 'count', count)
                msg_bar.update(count)
                if history.messages:
                    # We may reinsert some we already have (so found > total)
                    found = min(found + len(history.messages), msg_bar.total)
                    req.offset_id = min(m.id for m in history.messages)
                    req.offset_date = min(m.date for m in history.messages)

                # Receiving less messages than the limit means we have
                # reached the end, so we need to exit. Next time we'll
                # start from offset 0 again so we can check for new messages.
                #
                # We dump forward (message ID going towards 0), so as soon
                # as the minimum message ID (now in offset ID) is less than
                # the highest ID ("closest" bound we need to reach), stop.
                if count < req.limit or req.offset_id <= stop_at:
                    __log__.debug('Received less messages than limit, done.')
                    max_id = self.dumper.get_max_message_id(target_id)
                    self.dumper.save_resume(target_id, stop_at=max_id)
                    break

                # Keep track of the last target ID (smallest one),
                # so we can resume from here in case of interruption.
                self.dumper.save_resume(
                    target_id,
                    msg=req.offset_id,
                    msg_date=req.offset_date,
                    stop_at=stop_at  # We DO want to preserve stop_at.
                )
                self.dumper.commit()

                chunks_left -= 1  # 0 means infinite, will reach -1 and never 0
                if chunks_left == 0:
                    __log__.debug('Reached maximum amount of chunks, done.')
                    break

                # Interlace with the admin log request if any
                if log_req:
                    result = await self.client(log_req)
                    self.enqueue_entities(
                        itertools.chain(result.users, result.chats))
                    if result.events:
                        log_req.max_id = self._dump_admin_log(
                            result.events, target)
                    else:
                        log_req = None

                # We need to sleep for HISTORY_DELAY but we have already spent
                # some of it invoking (so subtract said delta from the delay).
                await asyncio.sleep(
                    max(HISTORY_DELAY - (time.time() - start), 0))

            # Message loop complete, wait for the queues to empty
            msg_bar.n = msg_bar.total
            msg_bar.close()
            self.dumper.commit()

            # This loop is specific to the admin log (to finish up)
            while log_req and self._running:
                start = time.time()
                result = await self.client(log_req)
                self.enqueue_entities(
                    itertools.chain(result.users, result.chats))
                if result.events:
                    log_req.max_id = self._dump_admin_log(
                        result.events, target)
                    await asyncio.sleep(
                        max(HISTORY_DELAY - (time.time() - start), 0))
                else:
                    log_req = None

            __log__.info(
                'Done. Retrieving full information about %s missing entities.',
                self._user_queue.qsize() + self._chat_queue.qsize())
            await self._user_queue.join()
            await self._chat_queue.join()
            await self._media_queue.join()
        finally:
            self._running = False
            ent_bar.n = ent_bar.total
            ent_bar.close()
            med_bar.n = med_bar.total
            med_bar.close()
            # If the download was interrupted and there are users left in the
            # queue we want to save them into the database for the next run.
            entities = []
            while not self._user_queue.empty():
                entities.append(self._user_queue.get_nowait())
            while not self._chat_queue.empty():
                entities.append(self._chat_queue.get_nowait())
            if entities:
                self.dumper.save_resume_entities(target_id, entities)

            # Do the same with the media queue
            media = []
            while not self._media_queue.empty():
                media.append(self._media_queue.get_nowait())
            self.dumper.save_resume_media(media)

            if entities or media:
                self.dumper.commit()

            # Delete partially-downloaded files
            if (self._incomplete_download is not None
                    and os.path.isfile(self._incomplete_download)):
                os.remove(self._incomplete_download)
Beispiel #14
0
def on_message(event):
    peer_id = get_peer_id(event.input_chat)
    if STATE[peer_id].run(event, peer_id) == State.RESET:
        del STATE[peer_id]
Beispiel #15
0
import logging
from importlib import import_module
from garry import TelegramClient
from garry.utils import get_peer_id

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger('Kiritoborg@main')

client = TelegramClient('kirito',
                        6,
                        'eb06d4abfb49dc3eeb1aeb98ae0f581e',
                        update_workers=1,
                        spawn_read_thread=False)
client.start()
my_id = get_peer_id(client.get_me())

plugins = ('axe', 'points', 'nicknames', 'snippets', 'ninja', 'markdown')

for plugin in plugins:
    logger.info('loading plugins.{}...'.format(plugin))
    import_module('plugins.{}'.format(plugin))

client.idle()