async def _internal_transfer_to_telegram( client: TelegramClient, response: BinaryIO, file_size, file_name, progress_callback: callable, max_connection=None) -> Tuple[TypeInputFile, int]: file_id = helpers.generate_random_long() # file_size = os.path.getsize(response.name) hash_md5 = hashlib.md5() uploader = ParallelTransferrer(client) part_size, part_count, is_large = await uploader.init_upload( file_id, file_size, max_connection=max_connection) buffer = bytearray() part_index = 0 async for data in stream_file(response, chunk_size=part_size): part_index += 1 if len(data) == 0: part_index -= 1 break # if len(data) != part_size and part_index != part_count: # dat = b'\0' * (part_size - len(data)) # data += dat if not is_large: hash_md5.update(data) if len(buffer) == 0: await uploader.upload(data) if part_index >= part_count: break else: continue new_len = len(buffer) + len(data) if new_len >= part_size: cutoff = part_size - len(buffer) buffer.extend(data[:cutoff]) await uploader.upload(bytes(buffer)) buffer.clear() buffer.extend(data[cutoff:]) else: buffer.extend(data) if part_index >= part_count: break else: continue for u in uploader.senders: u.request.file_total_parts = part_index u.part_count = part_index part_count = part_index if len(buffer) > 0: await uploader.upload(bytes(buffer)) await uploader.finish_upload() if is_large: return InputFileBig(file_id, part_count, file_name), file_size else: return InputFile(file_id, part_count, file_name, hash_md5.hexdigest()), file_size
async def _internal_transfer_to_telegram( client: MautrixTelegramClient, response: ClientResponse) -> tuple[TypeInputFile, int]: file_id = helpers.generate_random_long() file_size = response.content_length hash_md5 = hashlib.md5() uploader = ParallelTransferrer(client) part_size, part_count, is_large = await uploader.init_upload( file_id, file_size) buffer = bytearray() async for data in response.content: if not is_large: hash_md5.update(data) if len(buffer) == 0 and len(data) == part_size: await uploader.upload(data) continue new_len = len(buffer) + len(data) if new_len >= part_size: cutoff = part_size - len(buffer) buffer.extend(data[:cutoff]) await uploader.upload(bytes(buffer)) buffer.clear() buffer.extend(data[cutoff:]) else: buffer.extend(data) if len(buffer) > 0: await uploader.upload(bytes(buffer)) await uploader.finish_upload() if is_large: return InputFileBig(file_id, part_count, "upload"), file_size else: return InputFile(file_id, part_count, "upload", hash_md5.hexdigest()), file_size
async def _internal_transfer_to_telegram( client: TelegramClient, response: BinaryIO, file_size, file_name, progress_callback: callable, max_connection=None) -> Tuple[TypeInputFile, int]: file_id = helpers.generate_random_long() # file_size = os.path.getsize(response.name) # hash_md5 = hashlib.md5() uploader = ParallelTransferrer(client) part_size, part_count, is_large = await uploader.init_upload( file_id, file_size, max_connection=max_connection) buffer = bytearray() # part_index = 0 # complete = False for part_index in range(part_count): # async for data in stream_file(response, chunk_size=part_size): data = await response.read(part_size) # part_index += 1 len_data = len(data) if len_data == 0: # data was read fully # emulate last piece # (for zip files) data = b'\0' if len_data != part_size and part_index != part_count - 1: dat = b'\0' * (part_size - len_data) data += dat # if not is_large: # hash_md5.update(data) if len(buffer) == 0 and len(data) == part_size: await uploader.upload(data) # if complete: # break continue new_len = len(buffer) + len(data) if new_len >= part_size: cutoff = part_size - len(buffer) buffer.extend(data[:cutoff]) await uploader.upload(bytes(buffer)) buffer.clear() buffer.extend(data[cutoff:]) else: buffer.extend(data) # if complete: # break # if complete: # part_count = part_index if len(buffer) > 0: await uploader.upload(bytes(buffer)) await uploader.finish_upload() # if is_large: return InputFileBig(file_id, part_count, file_name), file_size
async def _internal_transfer_to_telegram(client: TelegramClient, response: Union[BinaryIO, BytesIO, BufferedReader], progress_callback: callable, name: str = None ) -> Tuple[TypeInputFile, int]: file_id = helpers.generate_random_long() if isinstance(response, BytesIO): file_size = len(response.getvalue()) else: file_size = os.path.getsize(name or response.name) hash_md5 = hashlib.md5() uploader = ParallelTransferrer(client) part_size, part_count, is_large = await uploader.init_upload(file_id, file_size) buffer = bytearray() for data in stream_file(response): if progress_callback: r = progress_callback(response.tell(), file_size) if inspect.isawaitable(r): await r if not is_large: hash_md5.update(data) if len(buffer) == 0 and len(data) == part_size: await uploader.upload(data) continue new_len = len(buffer) + len(data) if new_len >= part_size: cutoff = part_size - len(buffer) buffer.extend(data[:cutoff]) await uploader.upload(bytes(buffer)) buffer.clear() buffer.extend(data[cutoff:]) else: buffer.extend(data) if len(buffer) > 0: await uploader.upload(bytes(buffer)) await uploader.finish_upload() if is_large: return InputFileBig(file_id, part_count, name or "upload"), file_size return InputFile(file_id, part_count, name or "upload", hash_md5.hexdigest()), file_size
def upload_file(self, file, part_size_kb=None, file_name=None, use_cache=None, progress_callback=None): """ Uploads the specified file and returns a handle (an instance of InputFile or InputFileBig, as required) which can be later used before it expires (they are usable during less than a day). Uploading a file will simply return a "handle" to the file stored remotely in the Telegram servers, which can be later used on. This will **not** upload the file to your own chat or any chat at all. Args: file (`str` | `bytes` | `file`): The path of the file, byte array, or stream that will be sent. Note that if a byte array or a stream is given, a filename or its type won't be inferred, and it will be sent as an "unnamed application/octet-stream". Subsequent calls with the very same file will result in immediate uploads, unless ``.clear_file_cache()`` is called. part_size_kb (`int`, optional): Chunk size when uploading files. The larger, the less requests will be made (up to 512KB maximum). file_name (`str`, optional): The file name which will be used on the resulting InputFile. If not specified, the name will be taken from the ``file`` and if this is not a ``str``, it will be ``"unnamed"``. use_cache (`type`, optional): The type of cache to use (currently either ``InputDocument`` or ``InputPhoto``). If present and the file is small enough to need the MD5, it will be checked against the database, and if a match is found, the upload won't be made. Instead, an instance of type ``use_cache`` will be returned. progress_callback (`callable`, optional): A callback function accepting two parameters: ``(sent bytes, total)``. Returns: :tl:`InputFileBig` if the file size is larger than 10MB, ``InputSizedFile`` (subclass of :tl:`InputFile`) otherwise. """ if isinstance(file, (InputFile, InputFileBig)): return file # Already uploaded if isinstance(file, str): file_size = os.path.getsize(file) elif isinstance(file, bytes): file_size = len(file) else: file = file.read() file_size = len(file) # File will now either be a string or bytes if not part_size_kb: part_size_kb = utils.get_appropriated_part_size(file_size) if part_size_kb > 512: raise ValueError('The part size must be less or equal to 512KB') part_size = int(part_size_kb * 1024) if part_size % 1024 != 0: raise ValueError( 'The part size must be evenly divisible by 1024') # Set a default file name if None was specified file_id = helpers.generate_random_long() if not file_name: if isinstance(file, str): file_name = os.path.basename(file) else: file_name = str(file_id) # Determine whether the file is too big (over 10MB) or not # Telegram does make a distinction between smaller or larger files is_large = file_size > 10 * 1024 * 1024 hash_md5 = hashlib.md5() if not is_large: # Calculate the MD5 hash before anything else. # As this needs to be done always for small files, # might as well do it before anything else and # check the cache. if isinstance(file, str): with open(file, 'rb') as stream: file = stream.read() hash_md5.update(file) if use_cache: cached = self.session.get_file( hash_md5.digest(), file_size, cls=use_cache ) if cached: return cached part_count = (file_size + part_size - 1) // part_size __log__.info('Uploading file of %d bytes in %d chunks of %d', file_size, part_count, part_size) with open(file, 'rb') if isinstance(file, str) else BytesIO(file) as stream: threads_count = 2 + int((self._upload_threads_count - 2) * float(file_size) / (1024 * 1024 * 10)) threads_count = min(threads_count, self._upload_threads_count) threads_count = min(part_count, threads_count) upload_thread = [] q_request = Queue() # spawn threads for i in range(threads_count): thread_dl = self.ProcessUpload('thread {0}'.format(i), self, q_request) thread_dl.start() upload_thread.append(thread_dl) for part_index in range(0, part_count, threads_count): # Read the file by in chunks of size part_size for part_thread_index in range(threads_count): if part_index + part_thread_index >= part_count: break part = stream.read(part_size) # The SavePartRequest is different depending on whether # the file is too large or not (over or less than 10MB) if is_large: request = SaveBigFilePartRequest(file_id, part_index + part_thread_index, part_count, part) else: request = SaveFilePartRequest(file_id, part_index + part_thread_index, part) q_request.put(request) # q_request.join() job_completed = False while not job_completed: for th in upload_thread: if th: if th.result is True: job_completed = True __log__.debug('Uploaded %d/%d', part_index + 1, part_count) if progress_callback: progress_callback(stream.tell(), file_size) elif th.result is False: raise RuntimeError('Failed to upload file part {}.'.format(part_index)) q_request.join() for i in range(threads_count): q_request.put(None) for th in upload_thread: th.join() if is_large: return InputFileBig(file_id, part_count, file_name) else: return InputSizedFile( file_id, part_count, file_name, md5=hash_md5, size=file_size )
def upload_file(self, file_path, part_size_kb=None, file_name=None, progress_callback=None): """Uploads the specified file_path and returns a handle which can be later used :param file_path: The file path of the file that will be uploaded :param part_size_kb: The part size when uploading the file. None = Automatic :param file_name: The name of the uploaded file. None = Automatic :param progress_callback: A callback function which takes two parameters, uploaded size (in bytes) and total file size (in bytes) This is called every time a part is uploaded """ file_size = path.getsize(file_path) if not part_size_kb: part_size_kb = get_appropiate_part_size(file_size) if part_size_kb > 512: raise ValueError('The part size must be less or equal to 512KB') part_size = int(part_size_kb * 1024) if part_size % 1024 != 0: raise ValueError('The part size must be evenly divisible by 1024') # Determine whether the file is too big (over 10MB) or not # Telegram does make a distinction between smaller or larger files is_large = file_size > 10 * 1024 * 1024 part_count = (file_size + part_size - 1) // part_size # Multiply the datetime timestamp by 10^6 to get the ticks # This is high likely going to be unique file_id = utils.generate_random_long() hash_md5 = md5() with open(file_path, 'rb') as file: for part_index in range(part_count): # Read the file by in chunks of size part_size part = file.read(part_size) # The SavePartRequest is different depending on whether # the file is too large or not (over or less than 10MB) if is_large: request = SaveBigFilePartRequest(file_id, part_index, part_count, part) else: request = SaveFilePartRequest(file_id, part_index, part) # Invoke the file upload and increment both the part index and MD5 checksum result = self.invoke(request) if result: if not is_large: # No need to update the hash if it's a large file hash_md5.update(part) if progress_callback: progress_callback(file.tell(), file_size) else: raise ValueError( 'Could not upload file part #{}'.format(part_index)) # Set a default file name if None was specified if not file_name: file_name = path.basename(file_path) # After the file has been uploaded, we can return a handle pointing to it if is_large: return InputFileBig(id=file_id, parts=part_count, name=file_name) else: return InputFile(id=file_id, parts=part_count, name=file_name, md5_checksum=hash_md5.hexdigest())