def start_download(download_from_url_links, update_download_progress=None): startDownloadTime = time.time() threads, download_threads_info = Downloader() \ .get_download_threads(download_from_url_links, update_download_progress=update_download_progress) for thread in threads: thread.start() for thread in threads: thread.join() for download_thread_info in download_threads_info: status_progression = "" for status in download_thread_info.thread_status_progression: status_progression = status.name if status_progression == '' else f'{status_progression}, {status.name}' log.current_datetime(f"Thread:{download_thread_info.id}, progression history {status_progression}", allow_display=False) log.current_datetime(log.elapsed_time(startDownloadTime, __name__)) stitching_thread = threading.Thread(target=stitch_temp_files, args=[download_threads_info]) stitching_thread.start() stitching_thread.join() pass
def get_content_length_from_url(url): try: with urllib.request.urlopen(url) as meta: content_length = int(dict(meta.getheaders())["Content-Length"]) log.current_datetime( f"Content-Length:{format_bytes(content_length)}") except urllib.error.HTTPError: content_length = 0 return content_length
def stitch_temp_files(download_threads_info): start_time = time.time() complete_downloaded_file_name = download_threads_info[0].official_name path_to_stitch = os.path.join( file.get_download_dir_for(complete_downloaded_file_name), complete_downloaded_file_name) temp_folder = file.get_temp_folder(download_threads_info[0].official_name) with open(path_to_stitch, "a+b") as complete_downloaded_file: chunk_size = 1 for download_thread_info in download_threads_info: temporal_file = download_thread_info.content_file_name temporal_file_path = os.path.join(temp_folder, temporal_file) if not download_thread_info.did_download: continue try: with open(temporal_file_path, "rb") as temp_file: chunk = temp_file.read(chunk_size) while chunk: complete_downloaded_file.write(bytes(chunk)) chunk = temp_file.read(chunk_size) download_thread_info.did_stitch = True os.remove(temporal_file_path) except IOError: print( f"{download_thread_info.content_file_name} did not Stitch!" ) os.rmdir(temp_folder) downloaded_file_size = os.stat(path_to_stitch).st_size original_file_size = get_content_length_from_url( download_threads_info[0].latest_draw_result_url) is_complete_downloaded_file_stitched = downloaded_file_size >= original_file_size message = "Completed File Stitching, File Now Available" if is_complete_downloaded_file_stitched \ else "File Stitching, Failed! File is Not Usable" log.current_datetime(log.elapsed_time(start_time, message)) pass
def download(self, download_thread_info, update_download_progress=None): currentFileSize = 0 # region check if Server can Resume Download; by Updating the Headers {'Range': 'byte= interruptPoint - # endPoint'} if os.path.exists(download_thread_info.content_file_name): log.current_datetime(f"Thread:{download_thread_info.id} temp file exists") currentFileSize = os.stat(download_thread_info.content_file_name).st_size if currentFileSize > 0: # Interrupted Multi threaded download; start from byte; altered to currentFileSize. download_thread_info.fromByte = currentFileSize self.headers.update({'Range': f'bytes={currentFileSize}-{download_thread_info.toByte}'}) str_from = readable.format_bytes(currentFileSize) str_to = readable.format_bytes(download_thread_info.toByte) message = f"temp file will resume...at bytes={str_from}-{str_to}" log.current_datetime(message, allow_display=False) if currentFileSize == download_thread_info.toByte: download_thread_info.on_change_thread_status( DownloadThreadInfo.DownloadThreadStatus.NOT_ALLOWED_TO_PRECESS_TASK) # endregion isThreadAllowedToDownload = not (download_thread_info.thread_status == DownloadThreadInfo.DownloadThreadStatus.NOT_ALLOWED_TO_PRECESS_TASK) if isThreadAllowedToDownload: # region send a request to download file... with requests.request('GET', download_thread_info.latest_draw_result_url, headers=self.headers) as site: isThreadAllowedToDownload = (site.status_code == 206 or site.status_code == 200) if isThreadAllowedToDownload: if currentFileSize > 0 and site.status_code == 206: # Resuming Thread log.current_datetime(f""" Thread:{download_thread_info.id}, temp file will resume... at bytes={readable.format_bytes(currentFileSize)}-{readable.format_bytes( download_thread_info.toByte)} """, allow_display=False) self.headers.update({'Range': f'bytes={currentFileSize}-{download_thread_info.toByte}'}) download_thread_info.on_change_thread_status( DownloadThreadInfo.DownloadThreadStatus.RESUMING) elif self.downloadCounter >= 0 and site.status_code == 206: # Multi-Threaded log.current_datetime(f"Thread:{download_thread_info.id}, temp file will be created...", allow_display=False) elif self.downloadCounter == 0 and site.status_code == 200: # Single-Threaded # New Single thread, start from byte; altered to origin. log.current_datetime(f"Thread:{download_thread_info.id}, temp file will be created...", allow_display=False) download_thread_info.fromByte = 0 download_thread_info.toByte = download_thread_info.maxByte self.headers.update( {'Range': f'bytes={download_thread_info.fromByte}-{download_thread_info.toByte}'}) fromByte = download_thread_info.fromByte toByte = download_thread_info.toByte message_byte_range = f"{readable.format_bytes(fromByte)}-{readable.format_bytes(toByte)}" message = f"Thread:{download_thread_info.id}, temp file will start at... bytes={message_byte_range}" log.current_datetime(message, allow_display=False) self.headers.update({'Range': f'bytes={fromByte}-{toByte}'}) self.downloadCounter += 1 message = f"Thread:{download_thread_info.id}, is allowed to download" log.current_datetime(message, allow_display=False) total = toByte - fromByte chunkCount = 0 # region Downloading in-progress.. try: download_thread_info.on_change_thread_status( DownloadThreadInfo.DownloadThreadStatus.PROCESSING_TASK) temp_folder = file.create_temp_folder_for(download_thread_info.official_name) temp_file = download_thread_info.content_file_name temp_folder = os.path.join(temp_folder, temp_file) with open(temp_folder, "a+b") as f: const_text = f'Thread:{download_thread_info.id} Downloading...' try: chunk_size = 8 for chunk in site.iter_content(chunk_size): chunkCount = chunkCount + chunk_size f.write(chunk) # region ux design suite for logging downloading progress... percentage = round(float(chunkCount) * (100.0 / total)) percentage_const = f"{percentage}%" dynamic_text = "{}\r" print(dynamic_text, end=f"{const_text}{percentage_const}") def update(percentage_, id_): try: update_download_progress(percentage_, id_) except RuntimeError: pass thread = threading.Thread(target=update, args=[percentage, download_thread_info.id]) thread.start() # endregion download_thread_info.did_download = True download_thread_info.on_change_thread_status( DownloadThreadInfo.DownloadThreadStatus.DONE) except IOError: download_thread_info.did_download = False status = DownloadThreadInfo.DownloadThreadStatus.INTERRUPTED download_thread_info.on_change_thread_status(status) except IOError: download_thread_info.did_download = False status = DownloadThreadInfo.DownloadThreadStatus.INTERRUPTED download_thread_info.on_change_thread_status(status) # endregion else: status = DownloadThreadInfo.DownloadThreadStatus.NOT_ALLOWED_TO_PRECESS_TASK download_thread_info.on_change_thread_status(status) message = f"Thread:{download_thread_info.id} is not allowed to download!" log.current_datetime(message) # endregion pass
def get_download_threads(self, url, thread_count=16, update_download_progress=None): threads = [] official_file_name, content_file_name, content_mime = readable.get_content_file_name_and_type(url) content_length = readable.get_content_length_from_url(url) eachThreadMustDownloadContentLength = int(content_length / thread_count) log.current_datetime(f'Range: bytes= 0B - {readable.format_bytes(eachThreadMustDownloadContentLength)}', allow_display=False) self.headers.update({'Range': f'bytes=0-{eachThreadMustDownloadContentLength}'}) with requests.request('GET', url, headers=self.headers) as site: isMultiThreadingDownloadAllowed = (site.status_code == 206 and thread_count > 1) isSingleThreadingDownloadAllowed = site.status_code == 200 if not isMultiThreadingDownloadAllowed and not isSingleThreadingDownloadAllowed: raise AssertionError(f"Server Status not supported {site.status_code}") # 416 Range Not Satisfiable message = "MultiThreading Allowed!" if isMultiThreadingDownloadAllowed else "Single Threading Allowed!" log.current_datetime(message) if isSingleThreadingDownloadAllowed: eachThreadMustDownloadContentLength = content_length self.headers.update({'Range': f'bytes=0-{eachThreadMustDownloadContentLength}'}) thread_count = thread_count if isMultiThreadingDownloadAllowed else 1 downloadThreadsInfo = [] try: if isMultiThreadingDownloadAllowed: for current_thread_id in range(thread_count): downloadThread = DownloadThreadInfo(url, current_thread_id, content_file_name, content_mime, content_length, eachThreadMustDownloadContentLength) if current_thread_id == 0: downloadThread.fromByte = 0 downloadThread.toByte = eachThreadMustDownloadContentLength else: previous_thread_id = current_thread_id - 1 previousDownloadThread = downloadThreadsInfo[previous_thread_id] downloadThread.fromByte = previousDownloadThread.toByte downloadThread.toByte = downloadThread.fromByte + eachThreadMustDownloadContentLength downloadThread.on_change_thread_status(DownloadThreadInfo.DownloadThreadStatus.INITIALIZING) downloadThreadsInfo.append(downloadThread) threads.append( threading.Thread(target=self.download, args=[downloadThread, update_download_progress])) elif isSingleThreadingDownloadAllowed: downloadThread = DownloadThreadInfo(url, thread_count - 1, content_file_name, content_mime, content_length, eachThreadMustDownloadContentLength) downloadThread.fromByte = 0 downloadThread.toByte = content_length downloadThread.on_change_thread_status(DownloadThreadInfo.DownloadThreadStatus.INITIALIZING) downloadThreadsInfo.append(downloadThread) threads.append(threading.Thread(target=self.download, args=[downloadThread])) except ZeroDivisionError: print("Download partition thread failed!") return threads, downloadThreadsInfo