def printTransferProgress(*args, **kwargs): """Prints transfer progress using the cumulative format if that has been configured on the running thread, otherwise prints transfer directly to standard out as normal. This function should be imported instead of utils.printTransferProgress in locations that may be part of a cumulative transfer (i.e. a Synapse sync).""" if is_active(): _thread_local.cumulative_transfer_progress.printTransferProgress(*args, **kwargs) else: utils.printTransferProgress(*args, **kwargs)
def printTransferProgress(self, transferred, toBeTransferred, prefix='', postfix='', isBytes=True, dt=None, previouslyTransferred=0): """ Parameters match those of synapseclient.core.utils.printTransferProgress. """ if not sys.stdout.isatty(): return with self._lock: if toBeTransferred == 0 or float( transferred) / toBeTransferred >= 1: # if the individual transfer is complete then we pass through the print # to the underlying utility method which will print a complete 100% # progress bar on a newline. utils.printTransferProgress( transferred, toBeTransferred, prefix=prefix, postfix=postfix, isBytes=isBytes, dt=dt, previouslyTransferred=previouslyTransferred) # in order to know how much of the transferred data is newly transferred # we subtract the previously reported amount. this assumes that the printing # of the progress for any particular transfer is always conducted by the same # thread, which is true for all current transfer implementations. self._total_transferred += (transferred - _thread_local.thread_transferred) _thread_local.thread_transferred = transferred cumulative_dt = time.time() - self._start rate = self._total_transferred / float(cumulative_dt) rate = '(%s/s)' % utils.humanizeBytes(rate) if isBytes else rate # we print a rotating tick with each update self._spinner.print_tick() sys.stdout.write( f"{self._label} {utils.humanizeBytes(self._total_transferred)} {rate}" ) sys.stdout.flush()
def run(self): try: # write data to file with open(self.path, 'wb') as file_write: for start, data in self.data_queue: file_write.seek(start) file_write.write(data) self.transfer_status.transferred += len(data) printTransferProgress( self.transfer_status.transferred, self.transfer_status.total_bytes_to_be_transferred, 'Downloading ', os.path.basename(self.path), dt=self.transfer_status.elapsed_time()) except OSError: self.data_queue.close() raise
def _upload_chunk(part, completed, status, syn, filename, get_chunk_function, fileSize, partSize, t0, expired, bytes_already_uploaded=0): partNumber = part["partNumber"] url = part["uploadPresignedUrl"] syn.logger.debug("uploading this part of the upload: %s" % part) # if the upload url for another worker has expired, assume that this one also expired and return early with expired.get_lock(): if expired.value: syn.logger.debug("part %s is returning early because other parts have already expired" % partNumber) return try: chunk = get_chunk_function(partNumber, partSize) syn.logger.debug("start upload part %s" % partNumber) _put_chunk(url, chunk, syn.debug) syn.logger.debug("PUT upload of part %s complete" % partNumber) # compute the MD5 for the chunk md5 = hashlib.md5() md5.update(chunk) # confirm that part got uploaded syn.logger.debug("contacting Synapse to complete part %s" % partNumber) add_part_response = _add_part(syn, uploadId=status.uploadId, partNumber=partNumber, partMD5Hex=md5.hexdigest()) # if part was successfully uploaded, increment progress if add_part_response["addPartState"] == "ADD_SUCCESS": syn.logger.debug("finished contacting Synapse about adding part %s" % partNumber) with completed.get_lock(): completed.value += len(chunk) printTransferProgress(completed.value, fileSize, prefix='Uploading', postfix=filename, dt=time.time()-t0, previouslyTransferred=bytes_already_uploaded) else: syn.logger.debug("did not successfully add part %s" % partNumber) except Exception as ex1: if isinstance(ex1, SynapseHTTPError) and ex1.response.status_code == 403: syn.logger.debug("The pre-signed upload URL for part %s has expired. Restarting upload...\n" % partNumber) with expired.get_lock(): if not expired.value: warnings.warn("The pre-signed upload URL has expired. Restarting upload...\n") expired.value = True return # If we are not in verbose debug mode we will swallow the error and retry. else: syn.logger.debug("Encountered an exception: %s. Retrying...\n" % str(type(ex1)), exc_info=True)
def progress(*args, **kwargs): with lock: completed.value += 1 printTransferProgress(completed.value, totalCalls, prefix, postfix, isBytes) return func(*args, **kwargs)
def _multipart_upload(syn, filename, contentType, get_chunk_function, md5, fileSize, partSize=None, storageLocationId=None, **kwargs): """ Multipart Upload. :param syn: a Synapse object :param filename: a string containing the base filename :param contentType: contentType_ :param get_chunk_function: a function that takes a part number and size and returns the bytes of that chunk of the file :param md5: the part's MD5 as hex. :param fileSize: total number of bytes :param partSize: number of bytes per part. Minimum 5MB. :param storageLocationId: a id indicating where the file should be stored. retrieved from Synapse's UploadDestination :return: a MultipartUploadStatus_ object Keyword arguments are passed down to :py:func:`_start_multipart_upload`. .. MultipartUploadStatus: http://docs.synapse.org/rest/org/sagebionetworks/repo/model/file/MultipartUploadStatus.html .. contentType: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17 """ partSize = calculate_part_size(fileSize, partSize, MIN_PART_SIZE, MAX_NUMBER_OF_PARTS) status = _start_multipart_upload(syn, filename, md5, fileSize, partSize, contentType, storageLocationId=storageLocationId, **kwargs) # only force restart once kwargs['forceRestart'] = False completedParts = count_completed_parts(status.partsState) # bytes that were previously uploaded before the current upload began. This variable is set only once previously_completed_bytes = min(completedParts * partSize, fileSize) syn.logger.debug("file partitioned into size: %s" % partSize) syn.logger.debug("current multipart-upload status: %s" % status) syn.logger.debug("previously completed %d parts, estimated %d bytes" % (completedParts, previously_completed_bytes)) time_upload_started = time.time() retries = 0 mp = pool_provider.get_pool() try: while retries < MAX_RETRIES: syn.logger.debug("Started retry loop for multipart_upload. Currently %d/%d retries" % (retries, MAX_RETRIES)) # keep track of the number of bytes uploaded so far completed = pool_provider.get_value('d', min(completedParts * partSize, fileSize)) expired = pool_provider.get_value(ctypes.c_bool, False) printTransferProgress(completed.value, fileSize, prefix='Uploading', postfix=filename) def chunk_upload(part): return _upload_chunk(part, completed=completed, status=status, syn=syn, filename=filename, get_chunk_function=get_chunk_function, fileSize=fileSize, partSize=partSize, t0=time_upload_started, expired=expired, bytes_already_uploaded=previously_completed_bytes) syn.logger.debug("fetching pre-signed urls and mapping to Pool") url_generator = _get_presigned_urls(syn, status.uploadId, find_parts_to_upload(status.partsState)) mp.map(chunk_upload, url_generator) syn.logger.debug("completed pooled upload") # Check if there are still parts status = _start_multipart_upload(syn, filename, md5, fileSize, partSize, contentType, storageLocationId=storageLocationId, **kwargs) oldCompletedParts, completedParts = completedParts, count_completed_parts(status.partsState) progress = (completedParts > oldCompletedParts) retries = retries+1 if not progress else retries syn.logger.debug("progress made in this loop? %s" % progress) # Are we done, yet? if completed.value >= fileSize: try: syn.logger.debug("attempting to finalize multipart upload because completed.value >= filesize" " ({completed} >= {size})".format(completed=completed.value, size=fileSize)) status = _complete_multipart_upload(syn, status.uploadId) if status.state == "COMPLETED": break except Exception as ex1: syn.logger.error("Attempt to complete the multipart upload failed with exception %s %s" % (type(ex1), ex1)) syn.logger.debug("multipart upload failed:", exc_info=True) finally: mp.terminate() if status["state"] != "COMPLETED": raise SynapseError("Upload {id} did not complete. Try again.".format(id=status["uploadId"])) return status
def progress_callback(bytes): with bytes_transferred.get_lock(): bytes_transferred.value += bytes printTransferProgress(bytes_transferred.value, file_size, prefix=prefix, postfix=filename, dt=time.time() - t0, previouslyTransferred=0)