Exemple #1
0
    def calculate_download_size(self,
                                dl_propics,
                                dl_photos,
                                dl_docs,
                                docs_max_size=None,
                                before_date=None,
                                after_date=None):
        """Estimates the download size, given some parameters"""
        with TLDatabase(self.backup_dir) as db:
            total_size = 0

            # TODO How does Telegram Desktop find out the profile photo size?
            if dl_propics:
                total_size += db.count(
                    'users where photo not null') * AVERAGE_PROPIC_SIZE

            if dl_photos:
                for msg in db.query_messages(
                        self.get_query(MessageMediaPhoto, before_date,
                                       after_date)):
                    total_size += msg.media.photo.sizes[-1].size

            if dl_docs:
                for msg in db.query_messages(
                        self.get_query(MessageMediaDocument, before_date,
                                       after_date)):
                    if not docs_max_size or msg.media.document.size <= docs_max_size:
                        total_size += msg.media.document.size

            return total_size
Exemple #2
0
    def __init__(self,
                 client,
                 entity,
                 download_delay=1,
                 download_chunk_size=100):
        """
        :param client:              An initialized TelegramClient, which will be used to download the messages
        :param entity:              The entity (user, chat or channel) from which the backup will be made
        :param download_delay:      The download delay, in seconds, after a message chunk is downloaded
        :param download_chunk_size: The chunk size (i.e. how many messages do we download every time)
                                    The maximum allowed by Telegram is 100
        """
        self.client = client
        self.entity = entity

        self.download_delay = download_delay
        self.download_chunk_size = download_chunk_size

        self.backup_dir = path.join(Backuper.backups_dir, str(entity.id))
        self.media_handler = MediaHandler(self.backup_dir)

        # Open and close the database to create the require directories
        TLDatabase(self.backup_dir).close()

        # Set up all the directories and files that we'll be needing
        self.files = {
            'entity': path.join(self.backup_dir, 'entity.tlo'),
            'metadata': path.join(self.backup_dir, 'metadata.json')
        }
        # TODO Crashes if the other user got us blocked (AttributeError: 'NoneType' object has no attribute 'photo_big')

        # Is the backup running (are messages being downloaded?)
        self.backup_running = False

        # Event that gets fired when metadata is saved
        self.on_metadata_change = None

        # Save the entity and load the metadata
        with open(self.files['entity'], 'wb') as file:
            with BinaryWriter(file) as writer:
                entity.on_send(writer)
        self.metadata = self.load_metadata()
Exemple #3
0
    def backup_media_thread(self, dl_propics, dl_photos, dl_docs,
                            docs_max_size=None, before_date=None, after_date=None,
                            progress_callback=None):
        """Backups the specified media contained in the given database file"""
        self.backup_running = True

        # Create a connection to the database
        db = TLDatabase(self.backup_dir)

        # Store how many bytes we have/how many bytes there are in total
        current = 0
        total = self.calculate_download_size(dl_propics, dl_photos, dl_docs,
                                             docs_max_size, after_date, before_date)

        # Keep track from when we started to determine the estimated time left
        start = datetime.now()

        if dl_propics:
            # TODO Also query chats and channels
            for user in db.query_users('where photo not null'):
                if not self.backup_running:
                    return
                # Try downloading the photo
                output = self.media_handler.get_propic_path(user)
                try:
                    if not self.valid_file_exists(output):
                        self.client.download_profile_photo(
                            user.photo, add_extension=False, file_path=output)
                        sleep(self.download_delay)

                except RPCError as e:
                    print('Error downloading profile photo:', e)
                finally:
                    current += AVERAGE_PROPIC_SIZE
                    if progress_callback:
                        progress_callback(current, total, self.calculate_etl(current, total, start))

        if dl_photos:
            for msg in db.query_messages(self.get_query(MessageMediaPhoto, before_date, after_date)):
                if not self.backup_running:
                    return
                # Try downloading the photo
                output = self.media_handler.get_msg_media_path(msg)
                try:
                    if not self.valid_file_exists(output):
                        self.client.download_msg_media(
                            msg.media, add_extension=False, file_path=output)
                        sleep(self.download_delay)

                except RPCError as e:
                    print('Error downloading photo:', e)
                finally:
                    current += msg.media.photo.sizes[-1].size
                    if progress_callback:
                        progress_callback(current, total, self.calculate_etl(current, total, start))

        # TODO Add an internal callback to determine how the current document download is going,
        # and update our currently saved bytes count based on that
        if dl_docs:
            for msg in db.query_messages(self.get_query(MessageMediaDocument, before_date, after_date)):
                if not self.backup_running:
                    return

                if not docs_max_size or msg.media.document.size <= docs_max_size:
                    # Try downloading the document
                    output = self.media_handler.get_msg_media_path(msg)
                    try:
                        if not self.valid_file_exists(output):
                            self.client.download_msg_media(
                                msg.media, add_extension=False, file_path=output)
                        sleep(self.download_delay)

                    except RPCError as e:
                        print('Error downloading document:', e)
                    finally:
                        current += msg.media.document.size
                        if progress_callback:
                            progress_callback(current, total, self.calculate_etl(current, total, start))
        db.close()
Exemple #4
0
    def backup_messages_thread(self):
        """This method backups the messages and should be ran in a different thread"""
        self.backup_running = True

        # Create a connection to the database
        db = TLDatabase(self.backup_dir)

        # Determine whether we started making the backup from the very first message or not.
        # If this is the case:
        #   We won't need to come back to the first message again after we've finished downloading
        #   them all, since that first message will already be in backup.
        #
        # Otherwise, if we did not start from the first message:
        #   More messages were in the backup already, and after we backup those "left" ones,
        #   we must return to the first message and backup until where we started.
        started_at_0 = self.metadata['resume_msg_id'] == 0

        # Keep an internal downloaded count for it to be faster
        # (instead of querying the database all the time)
        self.metadata['saved_msgs'] = db.count('messages')

        # We also need to keep track of how many messages we've downloaded now
        # in order to calculate the estimated time left properly
        saved_msgs_now = 0

        # Make the backup
        try:
            # We need this to invoke GetHistoryRequest
            input_peer = get_input_peer(self.entity)

            # Keep track from when we started to determine the estimated time left
            start = datetime.now()

            # Enter the download-messages main loop
            while self.backup_running:
                # Invoke the GetHistoryRequest to get the next messages after those we have
                result = self.client.invoke(GetHistoryRequest(
                    peer=input_peer,
                    offset_id=self.metadata['resume_msg_id'],
                    limit=self.download_chunk_size,
                    offset_date=None,
                    add_offset=0,
                    max_id=0,
                    min_id=0
                ))
                self.metadata['total_msgs'] = getattr(result, 'count', len(result.messages))

                # First add users and chats, replacing any previous value
                for user in result.users:
                    db.add_object(user, replace=True)
                for chat in result.chats:
                    db.add_object(chat, replace=True)

                # Then add the messages to the backup
                for msg in result.messages:
                    if db.in_table(msg.id, 'messages'):
                        # If the message we retrieved was already saved, this means that we're
                        # done because we have the rest of the messages.
                        # Clear the list so we enter the next if, and break to early terminate
                        self.metadata['resume_msg_id'] = result.messages[-1].id
                        del result.messages[:]
                        break
                    else:
                        db.add_object(msg)
                        saved_msgs_now += 1
                        self.metadata['saved_msgs'] += 1
                        self.metadata['resume_msg_id'] = msg.id

                self.metadata['etl'] = str(self.calculate_etl(
                    saved_msgs_now, self.metadata['total_msgs'],
                    start=start))

                # Always commit at the end to save changes
                db.commit()
                self.save_metadata()

                # The list can be empty because we've either used a too big offset
                # (in which case we have all the previous messages), or we've reached
                # a point where we have the upcoming messages (so there's no need to
                # download them again and we stopped)
                if not result.messages:
                    # We've downloaded all the messages since the last backup
                    if started_at_0:
                        # And since we started from the very first message, we have them all
                        print('Downloaded all {}'.format(self.metadata['total_msgs']))
                        break
                    else:
                        # We need to start from the first message (latest sent message)
                        # and backup again until we have them all
                        self.metadata['resume_msg_id'] = 0
                        started_at_0 = True

                # Always sleep a bit, or Telegram will get angry and tell us to chill
                sleep(self.download_delay)

            pass  # end while

        except KeyboardInterrupt:
            print('Operation cancelled, not downloading more messages!')
            # Also commit here, we don't want to lose any information!
            db.commit()
            self.save_metadata()

        finally:
            self.backup_running = False
Exemple #5
0
    def backup_media_thread(self,
                            dl_propics,
                            dl_photos,
                            dl_docs,
                            docs_max_size=None,
                            before_date=None,
                            after_date=None,
                            progress_callback=None):
        """Backups the specified media contained in the given database file"""
        self.backup_running = True

        # Create a connection to the database
        db = TLDatabase(self.backup_dir)

        # Store how many bytes we have/how many bytes there are in total
        current = 0
        total = self.calculate_download_size(dl_propics, dl_photos, dl_docs,
                                             docs_max_size, after_date,
                                             before_date)

        # Keep track from when we started to determine the estimated time left
        start = datetime.now()

        if dl_propics:
            # TODO Also query chats and channels
            for user in db.query_users('where photo not null'):
                if not self.backup_running:
                    return
                # Try downloading the photo
                output = self.media_handler.get_propic_path(user)
                try:
                    if not self.valid_file_exists(output):
                        self.client.download_profile_photo(user.photo,
                                                           add_extension=False,
                                                           file_path=output)
                        sleep(self.download_delay)

                except RPCError as e:
                    print('Error downloading profile photo:', e)
                finally:
                    current += AVERAGE_PROPIC_SIZE
                    if progress_callback:
                        progress_callback(
                            current, total,
                            self.calculate_etl(current, total, start))

        if dl_photos:
            for msg in db.query_messages(
                    self.get_query(MessageMediaPhoto, before_date,
                                   after_date)):
                if not self.backup_running:
                    return
                # Try downloading the photo
                output = self.media_handler.get_msg_media_path(msg)
                try:
                    if not self.valid_file_exists(output):
                        self.client.download_msg_media(msg.media,
                                                       add_extension=False,
                                                       file_path=output)
                        sleep(self.download_delay)

                except RPCError as e:
                    print('Error downloading photo:', e)
                finally:
                    current += msg.media.photo.sizes[-1].size
                    if progress_callback:
                        progress_callback(
                            current, total,
                            self.calculate_etl(current, total, start))

        # TODO Add an internal callback to determine how the current document download is going,
        # and update our currently saved bytes count based on that
        if dl_docs:
            for msg in db.query_messages(
                    self.get_query(MessageMediaDocument, before_date,
                                   after_date)):
                if not self.backup_running:
                    return

                if not docs_max_size or msg.media.document.size <= docs_max_size:
                    # Try downloading the document
                    output = self.media_handler.get_msg_media_path(msg)
                    try:
                        if not self.valid_file_exists(output):
                            self.client.download_msg_media(msg.media,
                                                           add_extension=False,
                                                           file_path=output)
                        sleep(self.download_delay)

                    except RPCError as e:
                        print('Error downloading document:', e)
                    finally:
                        current += msg.media.document.size
                        if progress_callback:
                            progress_callback(
                                current, total,
                                self.calculate_etl(current, total, start))
        db.close()
Exemple #6
0
    def backup_messages_thread(self):
        """This method backups the messages and should be ran in a different thread"""
        self.backup_running = True

        # Create a connection to the database
        db = TLDatabase(self.backup_dir)

        # Determine whether we started making the backup from the very first message or not.
        # If this is the case:
        #   We won't need to come back to the first message again after we've finished downloading
        #   them all, since that first message will already be in backup.
        #
        # Otherwise, if we did not start from the first message:
        #   More messages were in the backup already, and after we backup those "left" ones,
        #   we must return to the first message and backup until where we started.
        started_at_0 = self.metadata['resume_msg_id'] == 0

        # Keep an internal downloaded count for it to be faster
        # (instead of querying the database all the time)
        self.metadata['saved_msgs'] = db.count('messages')

        # We also need to keep track of how many messages we've downloaded now
        # in order to calculate the estimated time left properly
        saved_msgs_now = 0

        # Make the backup
        try:
            # We need this to invoke GetHistoryRequest
            input_peer = self.entity

            # Keep track from when we started to determine the estimated time left
            start = datetime.now()

            # Enter the download-messages main loop
            self.client.connect()
            while self.backup_running:
                # Invoke the GetHistoryRequest to get the next messages after those we have
                result = self.client.invoke(
                    GetHistoryRequest(peer=input_peer,
                                      offset_id=self.metadata['resume_msg_id'],
                                      limit=self.download_chunk_size,
                                      offset_date=None,
                                      add_offset=0,
                                      max_id=0,
                                      min_id=0))
                # For some strange reason, GetHistoryRequest might return upload.file.File
                # Ensure we retrieved Messages or MessagesSlice
                if not isinstance(result, Messages) and not isinstance(result, MessagesSlice) \
                        and not isinstance(result, ChannelMessages):
                    print('Invalid result type when downloading messages:',
                          type(result))
                    sleep(self.download_delay)
                    continue

                self.metadata['total_msgs'] = getattr(result, 'count',
                                                      len(result.messages))

                # First add users and chats, replacing any previous value
                for user in result.users:
                    db.add_object(user, replace=True)
                for chat in result.chats:
                    db.add_object(chat, replace=True)

                # Then add the messages to the backup
                for msg in result.messages:
                    if db.in_table(msg.id, 'messages'):
                        # If the message we retrieved was already saved, this means that we're
                        # done because we have the rest of the messages.
                        # Clear the list so we enter the next if, and break to early terminate
                        self.metadata['resume_msg_id'] = result.messages[-1].id
                        del result.messages[:]
                        break
                    else:
                        db.add_object(msg)
                        saved_msgs_now += 1
                        self.metadata['saved_msgs'] += 1
                        self.metadata['resume_msg_id'] = msg.id

                self.metadata['etl'] = str(
                    self.calculate_etl(saved_msgs_now,
                                       self.metadata['total_msgs'],
                                       start=start))

                # Always commit at the end to save changes
                db.commit()
                self.save_metadata()

                # The list can be empty because we've either used a too big offset
                # (in which case we have all the previous messages), or we've reached
                # a point where we have the upcoming messages (so there's no need to
                # download them again and we stopped)
                if not result.messages:
                    # We've downloaded all the messages since the last backup
                    if started_at_0:
                        # And since we started from the very first message, we have them all
                        print('Downloaded all {}'.format(
                            self.metadata['total_msgs']))
                        break
                    else:
                        # We need to start from the first message (latest sent message)
                        # and backup again until we have them all
                        self.metadata['resume_msg_id'] = 0
                        started_at_0 = True

                # Always sleep a bit, or Telegram will get angry and tell us to chill
                sleep(self.download_delay)

            pass  # end while

        except KeyboardInterrupt:
            print('Operation cancelled, not downloading more messages!')
            # Also commit here, we don't want to lose any information!
            db.commit()
            self.save_metadata()

        finally:
            self.backup_running = False
Exemple #7
0
    def export_thread(self, callback):
        """The exporting a conversation method (should be ran in a different thread)"""

        with TLDatabase(self.backups_dir) as db:
            db_media_handler = MediaHandler(self.backups_dir)

            # First copy the default media files
            self.copy_default_media()

            progress = {
                'exported': 0,
                'total': db.count('messages'),
                'etl': 'Unknown'
            }

            # The first date will obviously be the first day
            # TODO This fails if there are 0 messages in the database, export should be disabled!
            previous_date = self.get_message_date(
                db.query_message('order by id asc'))

            # Also find the next day
            following_date = self.get_previous_and_next_day(db,
                                                            previous_date)[1]

            # Set the first writer (which will have the "previous" date, the first one)
            writer = HTMLTLWriter(previous_date,
                                  self.media_handler,
                                  following_date=following_date)

            # Keep track from when we started to determine the estimated time left
            start = datetime.now()

            # Export the profile photos, from users chats and channels
            # TODO This should also have a progress if we have a backup of thousands of files!
            for user in db.query_users():
                if user.photo:
                    source = db_media_handler.get_propic_path(user)
                    output = self.media_handler.get_propic_path(user)
                    if isfile(source):
                        copyfile(source, output)

            # Iterate over all the messages to export them in their respective days
            for msg in db.query_messages('order by id asc'):
                msg_date = self.get_message_date(msg)
                progress['exported'] += 1

                # As soon as we're in the next day, update the output the writer
                if msg_date != previous_date:
                    # Exit the previous writer to end the header
                    writer.__exit__(None, None, None)

                    # Update date values and create a new instance
                    previous_date, following_date =\
                        self.get_previous_and_next_day(db, msg_date)

                    writer = HTMLTLWriter(msg_date,
                                          self.media_handler,
                                          previous_date=previous_date,
                                          following_date=following_date)
                    # Call the callback
                    if callback:
                        progress['etl'] = self.calculate_etl(
                            start, progress['exported'], progress['total'])
                        callback(progress)
                    else:
                        print(progress)

                writer.write_message(msg, db)
                # If the message has media, we need to copy it so it's accessible by the exported HTML
                if not isinstance(msg, MessageService) and msg.media:
                    source = db_media_handler.get_msg_media_path(msg)
                    output = self.media_handler.get_msg_media_path(msg)
                    # Source may be None if the media is unsupported (i.e. a webpage)
                    if source and isfile(source):
                        copyfile(source, output)

                previous_date = msg_date

            # Always exit at the end
            writer.__exit__(None, None, None)
            # Call the callback to notify we've finished
            if callback:
                progress['etl'] = timedelta(seconds=0)
                callback(progress)