Exemple #1
0
async def _internal_transfer_to_telegram(
        client: TelegramClient,
        response: BinaryIO,
        file_size,
        file_name,
        progress_callback: callable,
        max_connection=None) -> Tuple[TypeInputFile, int]:
    file_id = helpers.generate_random_long()
    # file_size = os.path.getsize(response.name)

    hash_md5 = hashlib.md5()
    uploader = ParallelTransferrer(client)
    part_size, part_count, is_large = await uploader.init_upload(
        file_id, file_size, max_connection=max_connection)
    buffer = bytearray()
    part_index = 0
    async for data in stream_file(response, chunk_size=part_size):
        part_index += 1
        if len(data) == 0:
            part_index -= 1
            break
        # if len(data) != part_size and part_index != part_count:
        #     dat = b'\0' * (part_size - len(data))
        #     data += dat
        if not is_large:
            hash_md5.update(data)
        if len(buffer) == 0:
            await uploader.upload(data)
            if part_index >= part_count:
                break
            else:
                continue
        new_len = len(buffer) + len(data)
        if new_len >= part_size:
            cutoff = part_size - len(buffer)
            buffer.extend(data[:cutoff])
            await uploader.upload(bytes(buffer))
            buffer.clear()
            buffer.extend(data[cutoff:])
        else:
            buffer.extend(data)

        if part_index >= part_count:
            break
        else:
            continue

    for u in uploader.senders:
        u.request.file_total_parts = part_index
        u.part_count = part_index
    part_count = part_index

    if len(buffer) > 0:
        await uploader.upload(bytes(buffer))
    await uploader.finish_upload()
    if is_large:
        return InputFileBig(file_id, part_count, file_name), file_size
    else:
        return InputFile(file_id, part_count, file_name,
                         hash_md5.hexdigest()), file_size
Exemple #2
0
async def _internal_transfer_to_telegram(
        client: MautrixTelegramClient,
        response: ClientResponse) -> tuple[TypeInputFile, int]:
    file_id = helpers.generate_random_long()
    file_size = response.content_length

    hash_md5 = hashlib.md5()
    uploader = ParallelTransferrer(client)
    part_size, part_count, is_large = await uploader.init_upload(
        file_id, file_size)
    buffer = bytearray()
    async for data in response.content:
        if not is_large:
            hash_md5.update(data)
        if len(buffer) == 0 and len(data) == part_size:
            await uploader.upload(data)
            continue
        new_len = len(buffer) + len(data)
        if new_len >= part_size:
            cutoff = part_size - len(buffer)
            buffer.extend(data[:cutoff])
            await uploader.upload(bytes(buffer))
            buffer.clear()
            buffer.extend(data[cutoff:])
        else:
            buffer.extend(data)
    if len(buffer) > 0:
        await uploader.upload(bytes(buffer))
    await uploader.finish_upload()
    if is_large:
        return InputFileBig(file_id, part_count, "upload"), file_size
    else:
        return InputFile(file_id, part_count, "upload",
                         hash_md5.hexdigest()), file_size
Exemple #3
0
async def _internal_transfer_to_telegram(
        client: TelegramClient,
        response: BinaryIO,
        file_size,
        file_name,
        progress_callback: callable,
        max_connection=None) -> Tuple[TypeInputFile, int]:
    file_id = helpers.generate_random_long()
    # file_size = os.path.getsize(response.name)

    # hash_md5 = hashlib.md5()
    uploader = ParallelTransferrer(client)
    part_size, part_count, is_large = await uploader.init_upload(
        file_id, file_size, max_connection=max_connection)
    buffer = bytearray()
    # part_index = 0
    # complete = False
    for part_index in range(part_count):
        # async for data in stream_file(response, chunk_size=part_size):
        data = await response.read(part_size)
        # part_index += 1
        len_data = len(data)
        if len_data == 0:
            # data was read fully
            # emulate last piece
            # (for zip files)
            data = b'\0'
        if len_data != part_size and part_index != part_count - 1:
            dat = b'\0' * (part_size - len_data)
            data += dat

        # if not is_large:
        #     hash_md5.update(data)
        if len(buffer) == 0 and len(data) == part_size:
            await uploader.upload(data)
            # if complete:
            #     break
            continue
        new_len = len(buffer) + len(data)
        if new_len >= part_size:
            cutoff = part_size - len(buffer)
            buffer.extend(data[:cutoff])
            await uploader.upload(bytes(buffer))
            buffer.clear()
            buffer.extend(data[cutoff:])
        else:
            buffer.extend(data)
        # if complete:
        #     break
    # if complete:
    #     part_count = part_index
    if len(buffer) > 0:
        await uploader.upload(bytes(buffer))
    await uploader.finish_upload()
    # if is_large:
    return InputFileBig(file_id, part_count, file_name), file_size
Exemple #4
0
async def _internal_transfer_to_telegram(client: TelegramClient,
                                         response: Union[BinaryIO, BytesIO, BufferedReader],
                                         progress_callback: callable,
                                         name: str = None
                                         ) -> Tuple[TypeInputFile, int]:
    file_id = helpers.generate_random_long()

    if isinstance(response, BytesIO):
        file_size = len(response.getvalue())
    else:
        file_size = os.path.getsize(name or response.name)

    hash_md5 = hashlib.md5()
    uploader = ParallelTransferrer(client)
    part_size, part_count, is_large = await uploader.init_upload(file_id, file_size)
    buffer = bytearray()

    for data in stream_file(response):
        if progress_callback:
            r = progress_callback(response.tell(), file_size)

            if inspect.isawaitable(r):
                await r

        if not is_large:
            hash_md5.update(data)

        if len(buffer) == 0 and len(data) == part_size:
            await uploader.upload(data)
            continue

        new_len = len(buffer) + len(data)

        if new_len >= part_size:
            cutoff = part_size - len(buffer)
            buffer.extend(data[:cutoff])
            await uploader.upload(bytes(buffer))
            buffer.clear()
            buffer.extend(data[cutoff:])
        else:
            buffer.extend(data)

    if len(buffer) > 0:
        await uploader.upload(bytes(buffer))

    await uploader.finish_upload()

    if is_large:
        return InputFileBig(file_id, part_count, name or "upload"), file_size

    return InputFile(file_id, part_count, name or "upload", hash_md5.hexdigest()), file_size
Exemple #5
0
    def upload_file(self,
                    file,
                    part_size_kb=None,
                    file_name=None,
                    use_cache=None,
                    progress_callback=None):
        """
        Uploads the specified file and returns a handle (an instance of
        InputFile or InputFileBig, as required) which can be later used
        before it expires (they are usable during less than a day).

        Uploading a file will simply return a "handle" to the file stored
        remotely in the Telegram servers, which can be later used on. This
        will **not** upload the file to your own chat or any chat at all.

        Args:
            file (`str` | `bytes` | `file`):
                The path of the file, byte array, or stream that will be sent.
                Note that if a byte array or a stream is given, a filename
                or its type won't be inferred, and it will be sent as an
                "unnamed application/octet-stream".

                Subsequent calls with the very same file will result in
                immediate uploads, unless ``.clear_file_cache()`` is called.

            part_size_kb (`int`, optional):
                Chunk size when uploading files. The larger, the less
                requests will be made (up to 512KB maximum).

            file_name (`str`, optional):
                The file name which will be used on the resulting InputFile.
                If not specified, the name will be taken from the ``file``
                and if this is not a ``str``, it will be ``"unnamed"``.

            use_cache (`type`, optional):
                The type of cache to use (currently either ``InputDocument``
                or ``InputPhoto``). If present and the file is small enough
                to need the MD5, it will be checked against the database,
                and if a match is found, the upload won't be made. Instead,
                an instance of type ``use_cache`` will be returned.

            progress_callback (`callable`, optional):
                A callback function accepting two parameters:
                ``(sent bytes, total)``.

        Returns:
            :tl:`InputFileBig` if the file size is larger than 10MB,
            ``InputSizedFile`` (subclass of :tl:`InputFile`) otherwise.
        """
        if isinstance(file, (InputFile, InputFileBig)):
            return file  # Already uploaded

        if isinstance(file, str):
            file_size = os.path.getsize(file)
        elif isinstance(file, bytes):
            file_size = len(file)
        else:
            file = file.read()
            file_size = len(file)

        # File will now either be a string or bytes
        if not part_size_kb:
            part_size_kb = utils.get_appropriated_part_size(file_size)

        if part_size_kb > 512:
            raise ValueError('The part size must be less or equal to 512KB')

        part_size = int(part_size_kb * 1024)
        if part_size % 1024 != 0:
            raise ValueError(
                'The part size must be evenly divisible by 1024')

        # Set a default file name if None was specified
        file_id = helpers.generate_random_long()
        if not file_name:
            if isinstance(file, str):
                file_name = os.path.basename(file)
            else:
                file_name = str(file_id)

        # Determine whether the file is too big (over 10MB) or not
        # Telegram does make a distinction between smaller or larger files
        is_large = file_size > 10 * 1024 * 1024
        hash_md5 = hashlib.md5()
        if not is_large:
            # Calculate the MD5 hash before anything else.
            # As this needs to be done always for small files,
            # might as well do it before anything else and
            # check the cache.
            if isinstance(file, str):
                with open(file, 'rb') as stream:
                    file = stream.read()
            hash_md5.update(file)
            if use_cache:
                cached = self.session.get_file(
                    hash_md5.digest(), file_size, cls=use_cache
                )
                if cached:
                    return cached

        part_count = (file_size + part_size - 1) // part_size
        __log__.info('Uploading file of %d bytes in %d chunks of %d',
                     file_size, part_count, part_size)

        with open(file, 'rb') if isinstance(file, str) else BytesIO(file) as stream:
            threads_count = 2 + int((self._upload_threads_count - 2) * float(file_size) / (1024 * 1024 * 10))
            threads_count = min(threads_count, self._upload_threads_count)
            threads_count = min(part_count, threads_count)
            upload_thread = []
            q_request = Queue()
            # spawn threads
            for i in range(threads_count):
                thread_dl = self.ProcessUpload('thread {0}'.format(i), self, q_request)
                thread_dl.start()
                upload_thread.append(thread_dl)
            for part_index in range(0, part_count, threads_count):
                # Read the file by in chunks of size part_size
                for part_thread_index in range(threads_count):
                    if part_index + part_thread_index >= part_count:
                        break
                    part = stream.read(part_size)
                    # The SavePartRequest is different depending on whether
                    # the file is too large or not (over or less than 10MB)
                    if is_large:
                        request = SaveBigFilePartRequest(file_id, part_index + part_thread_index, part_count, part)
                    else:
                        request = SaveFilePartRequest(file_id, part_index + part_thread_index, part)
                    q_request.put(request)
                # q_request.join()
                job_completed = False
                while not job_completed:
                    for th in upload_thread:
                        if th:
                            if th.result is True:
                                job_completed = True
                                __log__.debug('Uploaded %d/%d', part_index + 1, part_count)
                                if progress_callback:
                                    progress_callback(stream.tell(), file_size)
                            elif th.result is False:
                                raise RuntimeError('Failed to upload file part {}.'.format(part_index))
            q_request.join()
            for i in range(threads_count):
                q_request.put(None)
            for th in upload_thread:
                th.join()
        if is_large:
            return InputFileBig(file_id, part_count, file_name)
        else:
            return InputSizedFile(
                file_id, part_count, file_name, md5=hash_md5, size=file_size
            )
Exemple #6
0
    def upload_file(self,
                    file_path,
                    part_size_kb=None,
                    file_name=None,
                    progress_callback=None):
        """Uploads the specified file_path and returns a handle which can be later used

        :param file_path: The file path of the file that will be uploaded
        :param part_size_kb: The part size when uploading the file. None = Automatic
        :param file_name: The name of the uploaded file. None = Automatic
        :param progress_callback: A callback function which takes two parameters,
                                  uploaded size (in bytes) and total file size (in bytes)
                                  This is called every time a part is uploaded
        """
        file_size = path.getsize(file_path)
        if not part_size_kb:
            part_size_kb = get_appropiate_part_size(file_size)

        if part_size_kb > 512:
            raise ValueError('The part size must be less or equal to 512KB')

        part_size = int(part_size_kb * 1024)
        if part_size % 1024 != 0:
            raise ValueError('The part size must be evenly divisible by 1024')

        # Determine whether the file is too big (over 10MB) or not
        # Telegram does make a distinction between smaller or larger files
        is_large = file_size > 10 * 1024 * 1024
        part_count = (file_size + part_size - 1) // part_size

        # Multiply the datetime timestamp by 10^6 to get the ticks
        # This is high likely going to be unique
        file_id = utils.generate_random_long()
        hash_md5 = md5()

        with open(file_path, 'rb') as file:
            for part_index in range(part_count):
                # Read the file by in chunks of size part_size
                part = file.read(part_size)

                # The SavePartRequest is different depending on whether
                # the file is too large or not (over or less than 10MB)
                if is_large:
                    request = SaveBigFilePartRequest(file_id, part_index,
                                                     part_count, part)
                else:
                    request = SaveFilePartRequest(file_id, part_index, part)

                # Invoke the file upload and increment both the part index and MD5 checksum
                result = self.invoke(request)
                if result:
                    if not is_large:
                        # No need to update the hash if it's a large file
                        hash_md5.update(part)

                    if progress_callback:
                        progress_callback(file.tell(), file_size)
                else:
                    raise ValueError(
                        'Could not upload file part #{}'.format(part_index))

        # Set a default file name if None was specified
        if not file_name:
            file_name = path.basename(file_path)

        # After the file has been uploaded, we can return a handle pointing to it
        if is_large:
            return InputFileBig(id=file_id, parts=part_count, name=file_name)
        else:
            return InputFile(id=file_id,
                             parts=part_count,
                             name=file_name,
                             md5_checksum=hash_md5.hexdigest())