def _dump_messages(self, messages, target): """ Helper method to iterate the messages from a GetMessageHistoryRequest and dump them into the Dumper, mostly to avoid excessive nesting. Also enqueues any media to be downloaded later by a different coroutine. """ for m in messages: if isinstance(m, types.Message): media_id = self.dumper.dump_media(m.media) if media_id and self._check_media(m.media): self.enqueue_media(media_id, utils.get_peer_id(target), m.from_id, m.date) self.dumper.dump_message(message=m, context_id=utils.get_peer_id(target), forward_id=self.dumper.dump_forward( m.fwd_from), media_id=media_id) elif isinstance(m, types.MessageService): if isinstance(m.action, types.MessageActionChatEditPhoto): media_id = self.dumper.dump_media(m.action.photo) self.enqueue_photo(m.action.photo, media_id, target, peer_id=m.from_id, date=m.date) else: media_id = None self.dumper.dump_message_service( message=m, context_id=utils.get_peer_id(target), media_id=media_id)
async def download_past_media(self, dumper, target_id): """ Downloads the past media that has already been dumped into the database but has not been downloaded for the given target ID yet. Media which formatted filename results in an already-existing file will be *ignored* and not re-downloaded again. """ # TODO Should this respect and download only allowed media? Or all? target_in = await self.client.get_input_entity(target_id) target = await self.client.get_entity(target_in) target_id = utils.get_peer_id(target) bar = tqdm.tqdm(unit='B', desc='media', unit_divisor=1000, unit_scale=True, bar_format=BAR_FORMAT, total=0, postfix={'chat': utils.get_display_name(target)}) msg_cursor = dumper.conn.cursor() msg_cursor.execute( 'SELECT ID, Date, FromID, MediaID FROM Message ' 'WHERE ContextID = ? AND MediaID IS NOT NULL', (target_id, )) msg_row = msg_cursor.fetchone() while msg_row: await self._download_media(media_id=msg_row[3], context_id=target_id, sender_id=msg_row[2], date=msg_row[1], bar=bar) msg_row = msg_cursor.fetchone()
def enqueue_entities(self, entities): """ Enqueues the given iterable of entities to be dumped later by a different coroutine. These in turn might enqueue profile photos. """ for entity in entities: eid = utils.get_peer_id(entity) self._displays[eid] = utils.get_display_name(entity) if isinstance(entity, types.User): if entity.deleted or entity.min: continue # Empty name would cause IntegrityError elif isinstance(entity, types.Channel): if entity.left: continue # Getting full info triggers ChannelPrivateError elif not isinstance(entity, (types.Chat, types.InputPeerUser, types.InputPeerChat, types.InputPeerChannel)): # Drop UserEmpty, ChatEmpty, ChatForbidden and ChannelForbidden continue if eid in self._checked_entity_ids: continue else: self._checked_entity_ids.add(eid) if isinstance(entity, (types.User, types.InputPeerUser)): self._user_queue.put_nowait(entity) else: self._chat_queue.put_nowait(entity)
def _dump_admin_log(self, events, target): """ Helper method to iterate the events from a GetAdminLogRequest and dump them into the Dumper, mostly to avoid excessive nesting. Also enqueues any media to be downloaded later by a different coroutine. """ for event in events: assert isinstance(event, types.ChannelAdminLogEvent) if isinstance(event.action, types.ChannelAdminLogEventActionChangePhoto): media_id1 = self.dumper.dump_media(event.action.new_photo) media_id2 = self.dumper.dump_media(event.action.prev_photo) self.enqueue_photo(event.action.new_photo, media_id1, target, peer_id=event.user_id, date=event.date) self.enqueue_photo(event.action.prev_photo, media_id2, target, peer_id=event.user_id, date=event.date) else: media_id1 = None media_id2 = None self.dumper.dump_admin_log_event(event, utils.get_peer_id(target), media_id1, media_id2) return min(e.id for e in events)
def test_formatter_get_chat(self): """ Ensures that the BaseFormatter is able to fetch the expected entities when using a date parameter. """ chat = types.Chat(id=123, title='Some title', photo=types.ChatPhotoEmpty(), participants_count=7, date=datetime.now(), version=1) dumper = Dumper(self.dumper_config) fmt = BaseFormatter(dumper.conn) for month in range(1, 13): dumper.dump_chat(chat, None, timestamp=int( datetime(year=2010, month=month, day=1).timestamp())) dumper.commit() cid = tl_utils.get_peer_id(chat) # Default should get the most recent version date = fmt.get_chat(cid).date_updated assert date == datetime(year=2010, month=12, day=1) # Expected behaviour is to get the previous available date target = datetime(year=2010, month=6, day=29) date = fmt.get_chat(cid, target).date_updated assert date == datetime(year=2010, month=6, day=1) # Expected behaviour is to get the next date if previous unavailable target = datetime(year=2009, month=12, day=1) date = fmt.get_chat(cid, target).date_updated assert date == datetime(year=2010, month=1, day=1)
def add_read_action(entity, target, action): if is_read(entity, target): action() else: read_actions.add( MessageAction(chat_id=get_peer_id(entity), message_id=target.id, action=action))
def enqueue_photo(self, photo, photo_id, context, peer_id=None, date=None): if not photo_id: return if not isinstance(context, int): context = utils.get_peer_id(context) if peer_id is None: peer_id = context if date is None: date = getattr(photo, 'date', None) or datetime.datetime.now() self.enqueue_media(photo_id, context, peer_id, date)
def find_fmt_dialog_padding(dialogs): """ Find the correct amount of space padding to give dialogs when printing them. """ no_username = NO_USERNAME[:-1] # Account for the added '@' if username return ( max(len(str(utils.get_peer_id(dialog.entity))) for dialog in dialogs), max( len( getattr(dialog.entity, 'username', no_username) or no_username) for dialog in dialogs) + 1)
def fmt_dialog(dialog, id_pad=0, username_pad=0): """ Space-fill a row with given padding values to ensure alignment when printing dialogs. """ username = getattr(dialog.entity, 'username', None) username = '******' + username if username else NO_USERNAME return '{:<{id_pad}} | {:<{username_pad}} | {}'.format( utils.get_peer_id(dialog.entity), username, dialog.name, id_pad=id_pad, username_pad=username_pad)
def ninja(event): this_id = get_peer_id(event.input_chat) triggered = [] for action in read_actions: if action.chat_id == this_id and event.is_read(action.message_id): scheduler.enter(1, 1, action.action) triggered.append(action) for action in triggered: read_actions.remove(action) if triggered: raise events.StopPropagation
def get_entity_cached(entity): """TelegramClient.get_entity but with a global cache""" # convert to hashable int if not already hashable key = entity if not isinstance(key, (int, str)): key = get_peer_id(key) # fetch if we dont have it cached if key not in ENTITY_CACHE or ENTITY_CACHE[key].is_expired(): logger.info('fetching entity for {}'.format(key)) ENTITY_CACHE[key] = CachedEntity(client.get_entity(entity)) return ENTITY_CACHE[key].entity
async def get_entities_iter(mode, in_list, client): """ Get a generator of entities to act on given a mode ('blacklist', 'whitelist') and an input from that mode. If whitelist, generator will be asynchronous. """ # TODO change None to empty blacklist? mode = mode.lower() if mode == 'whitelist': assert client is not None async for ent in entities_from_str(client, in_list): yield ent if mode == 'blacklist': assert client is not None blacklist = entities_from_str(client, in_list) avoid = set() async for entity in blacklist: avoid.add(utils.get_peer_id(entity)) # TODO Should this get_dialogs call be cached? How? for dialog in await client.get_dialogs(limit=None): if utils.get_peer_id(dialog.entity) not in avoid: yield dialog.entity return
async def start(self, target_id): """ Starts the dump with the given target ID. """ self._running = True self._incomplete_download = None target_in = await self.client.get_input_entity(target_id) target = await self.client.get_entity(target_in) target_id = utils.get_peer_id(target) found = self.dumper.get_message_count(target_id) chat_name = utils.get_display_name(target) msg_bar = tqdm.tqdm(unit=' messages', desc=chat_name, initial=found, bar_format=BAR_FORMAT) ent_bar = tqdm.tqdm(unit=' entities', desc='entities', bar_format=BAR_FORMAT, postfix={'chat': chat_name}) med_bar = tqdm.tqdm(unit='B', desc='media', unit_divisor=1000, unit_scale=True, bar_format=BAR_FORMAT, total=0, postfix={'chat': chat_name}) # Divisor is 1000 not 1024 since tqdm puts a K not a Ki asyncio.ensure_future(self._user_consumer(self._user_queue, ent_bar)) asyncio.ensure_future(self._chat_consumer(self._chat_queue, ent_bar)) asyncio.ensure_future(self._media_consumer(self._media_queue, med_bar)) self.enqueue_entities(self.dumper.iter_resume_entities(target_id)) for mid, sender_id, date in self.dumper.iter_resume_media(target_id): self.enqueue_media(mid, target_id, sender_id, date) try: self.enqueue_entities((target, )) ent_bar.total = len(self._checked_entity_ids) req = functions.messages.GetHistoryRequest( peer=target_in, offset_id=0, offset_date=None, add_offset=0, limit=self.dumper.chunk_size, max_id=0, min_id=0, hash=0) if isinstance(target_in, (types.InputPeerChat, types.InputPeerChannel)): try: __log__.info('Getting participants...') participants = await self.client.get_participants(target_in ) added, removed = self.dumper.dump_participants_delta( target_id, ids=[x.id for x in participants]) __log__.info('Saved %d new members, %d left the chat.', len(added), len(removed)) except ChatAdminRequiredError: __log__.info('Getting participants aborted (not admin).') req.offset_id, req.offset_date, stop_at = self.dumper.get_resume( target_id) if req.offset_id: __log__.info('Resuming at %s (%s)', req.offset_date, req.offset_id) # Check if we have access to the admin log # TODO Resume admin log? # Rather silly considering logs only last up to two days and # there isn't much information in them (due to their short life). if isinstance(target_in, types.InputPeerChannel): log_req = functions.channels.GetAdminLogRequest(target_in, q='', min_id=0, max_id=0, limit=1) try: await self.client(log_req) log_req.limit = 100 except ChatAdminRequiredError: log_req = None else: log_req = None chunks_left = self.dumper.max_chunks # This loop is for get history, although the admin log # is interlaced as well to dump both at the same time. while self._running: start = time.time() history = await self.client(req) # Queue found entities so they can be dumped later self.enqueue_entities( itertools.chain(history.users, history.chats)) ent_bar.total = len(self._checked_entity_ids) # Dump the messages from this batch self._dump_messages(history.messages, target) # Determine whether to continue dumping or we're done count = len(history.messages) msg_bar.total = getattr(history, 'count', count) msg_bar.update(count) if history.messages: # We may reinsert some we already have (so found > total) found = min(found + len(history.messages), msg_bar.total) req.offset_id = min(m.id for m in history.messages) req.offset_date = min(m.date for m in history.messages) # Receiving less messages than the limit means we have # reached the end, so we need to exit. Next time we'll # start from offset 0 again so we can check for new messages. # # We dump forward (message ID going towards 0), so as soon # as the minimum message ID (now in offset ID) is less than # the highest ID ("closest" bound we need to reach), stop. if count < req.limit or req.offset_id <= stop_at: __log__.debug('Received less messages than limit, done.') max_id = self.dumper.get_max_message_id(target_id) self.dumper.save_resume(target_id, stop_at=max_id) break # Keep track of the last target ID (smallest one), # so we can resume from here in case of interruption. self.dumper.save_resume( target_id, msg=req.offset_id, msg_date=req.offset_date, stop_at=stop_at # We DO want to preserve stop_at. ) self.dumper.commit() chunks_left -= 1 # 0 means infinite, will reach -1 and never 0 if chunks_left == 0: __log__.debug('Reached maximum amount of chunks, done.') break # Interlace with the admin log request if any if log_req: result = await self.client(log_req) self.enqueue_entities( itertools.chain(result.users, result.chats)) if result.events: log_req.max_id = self._dump_admin_log( result.events, target) else: log_req = None # We need to sleep for HISTORY_DELAY but we have already spent # some of it invoking (so subtract said delta from the delay). await asyncio.sleep( max(HISTORY_DELAY - (time.time() - start), 0)) # Message loop complete, wait for the queues to empty msg_bar.n = msg_bar.total msg_bar.close() self.dumper.commit() # This loop is specific to the admin log (to finish up) while log_req and self._running: start = time.time() result = await self.client(log_req) self.enqueue_entities( itertools.chain(result.users, result.chats)) if result.events: log_req.max_id = self._dump_admin_log( result.events, target) await asyncio.sleep( max(HISTORY_DELAY - (time.time() - start), 0)) else: log_req = None __log__.info( 'Done. Retrieving full information about %s missing entities.', self._user_queue.qsize() + self._chat_queue.qsize()) await self._user_queue.join() await self._chat_queue.join() await self._media_queue.join() finally: self._running = False ent_bar.n = ent_bar.total ent_bar.close() med_bar.n = med_bar.total med_bar.close() # If the download was interrupted and there are users left in the # queue we want to save them into the database for the next run. entities = [] while not self._user_queue.empty(): entities.append(self._user_queue.get_nowait()) while not self._chat_queue.empty(): entities.append(self._chat_queue.get_nowait()) if entities: self.dumper.save_resume_entities(target_id, entities) # Do the same with the media queue media = [] while not self._media_queue.empty(): media.append(self._media_queue.get_nowait()) self.dumper.save_resume_media(media) if entities or media: self.dumper.commit() # Delete partially-downloaded files if (self._incomplete_download is not None and os.path.isfile(self._incomplete_download)): os.remove(self._incomplete_download)
def on_message(event): peer_id = get_peer_id(event.input_chat) if STATE[peer_id].run(event, peer_id) == State.RESET: del STATE[peer_id]
import logging from importlib import import_module from garry import TelegramClient from garry.utils import get_peer_id logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger('Kiritoborg@main') client = TelegramClient('kirito', 6, 'eb06d4abfb49dc3eeb1aeb98ae0f581e', update_workers=1, spawn_read_thread=False) client.start() my_id = get_peer_id(client.get_me()) plugins = ('axe', 'points', 'nicknames', 'snippets', 'ninja', 'markdown') for plugin in plugins: logger.info('loading plugins.{}...'.format(plugin)) import_module('plugins.{}'.format(plugin)) client.idle()