def printTransferProgress(*args, **kwargs):
    """Prints transfer progress using the cumulative format if that has been configured on the running
    thread, otherwise prints transfer directly to standard out as normal. This function should
    be imported instead of utils.printTransferProgress in locations that may be part of a cumulative
    transfer (i.e. a Synapse sync)."""

    if is_active():
        _thread_local.cumulative_transfer_progress.printTransferProgress(*args, **kwargs)
    else:
        utils.printTransferProgress(*args, **kwargs)
    def printTransferProgress(self,
                              transferred,
                              toBeTransferred,
                              prefix='',
                              postfix='',
                              isBytes=True,
                              dt=None,
                              previouslyTransferred=0):
        """
        Parameters match those of synapseclient.core.utils.printTransferProgress.
        """

        if not sys.stdout.isatty():
            return

        with self._lock:
            if toBeTransferred == 0 or float(
                    transferred) / toBeTransferred >= 1:
                # if the individual transfer is complete then we pass through the print
                # to the underlying utility method which will print a complete 100%
                # progress bar on a newline.
                utils.printTransferProgress(
                    transferred,
                    toBeTransferred,
                    prefix=prefix,
                    postfix=postfix,
                    isBytes=isBytes,
                    dt=dt,
                    previouslyTransferred=previouslyTransferred)

            # in order to know how much of the transferred data is newly transferred
            # we subtract the previously reported amount. this assumes that the printing
            # of the progress for any particular transfer is always conducted by the same
            # thread, which is true for all current transfer implementations.
            self._total_transferred += (transferred -
                                        _thread_local.thread_transferred)
            _thread_local.thread_transferred = transferred

            cumulative_dt = time.time() - self._start
            rate = self._total_transferred / float(cumulative_dt)
            rate = '(%s/s)' % utils.humanizeBytes(rate) if isBytes else rate

            # we print a rotating tick with each update
            self._spinner.print_tick()

            sys.stdout.write(
                f"{self._label} {utils.humanizeBytes(self._total_transferred)} {rate}"
            )
            sys.stdout.flush()
Example #3
0
 def run(self):
     try:
         # write data to file
         with open(self.path, 'wb') as file_write:
             for start, data in self.data_queue:
                 file_write.seek(start)
                 file_write.write(data)
                 self.transfer_status.transferred += len(data)
                 printTransferProgress(
                     self.transfer_status.transferred,
                     self.transfer_status.total_bytes_to_be_transferred,
                     'Downloading ',
                     os.path.basename(self.path),
                     dt=self.transfer_status.elapsed_time())
     except OSError:
         self.data_queue.close()
         raise
Example #4
0
def _upload_chunk(part, completed, status, syn, filename, get_chunk_function,
                  fileSize, partSize, t0, expired, bytes_already_uploaded=0):
    partNumber = part["partNumber"]
    url = part["uploadPresignedUrl"]

    syn.logger.debug("uploading this part of the upload: %s" % part)
    # if the upload url for another worker has expired, assume that this one also expired and return early
    with expired.get_lock():
        if expired.value:
            syn.logger.debug("part %s is returning early because other parts have already expired" % partNumber)
            return

    try:
        chunk = get_chunk_function(partNumber, partSize)
        syn.logger.debug("start upload part %s" % partNumber)
        _put_chunk(url, chunk, syn.debug)
        syn.logger.debug("PUT upload of part %s complete" % partNumber)
        # compute the MD5 for the chunk
        md5 = hashlib.md5()
        md5.update(chunk)

        # confirm that part got uploaded
        syn.logger.debug("contacting Synapse to complete part %s" % partNumber)
        add_part_response = _add_part(syn, uploadId=status.uploadId,
                                      partNumber=partNumber, partMD5Hex=md5.hexdigest())
        # if part was successfully uploaded, increment progress
        if add_part_response["addPartState"] == "ADD_SUCCESS":
            syn.logger.debug("finished contacting Synapse about adding part %s" % partNumber)
            with completed.get_lock():
                completed.value += len(chunk)
            printTransferProgress(completed.value, fileSize, prefix='Uploading', postfix=filename, dt=time.time()-t0,
                                  previouslyTransferred=bytes_already_uploaded)
        else:
            syn.logger.debug("did not successfully add part %s" % partNumber)
    except Exception as ex1:
        if isinstance(ex1, SynapseHTTPError) and ex1.response.status_code == 403:
            syn.logger.debug("The pre-signed upload URL for part %s has expired. Restarting upload...\n" % partNumber)
            with expired.get_lock():
                if not expired.value:
                    warnings.warn("The pre-signed upload URL has expired. Restarting upload...\n")
                    expired.value = True
            return
        # If we are not in verbose debug mode we will swallow the error and retry.
        else:
            syn.logger.debug("Encountered an exception: %s. Retrying...\n" % str(type(ex1)), exc_info=True)
Example #5
0
 def progress(*args, **kwargs):
     with lock:
         completed.value += 1
     printTransferProgress(completed.value, totalCalls, prefix, postfix,
                           isBytes)
     return func(*args, **kwargs)
Example #6
0
def _multipart_upload(syn, filename, contentType, get_chunk_function, md5, fileSize, 
                      partSize=None, storageLocationId=None, **kwargs):
    """
    Multipart Upload.

    :param syn:                 a Synapse object
    :param filename:            a string containing the base filename
    :param contentType:         contentType_
    :param get_chunk_function:  a function that takes a part number and size and returns the bytes of that chunk of the
                                file
    :param md5:                 the part's MD5 as hex.
    :param fileSize:            total number of bytes
    :param partSize:            number of bytes per part. Minimum 5MB.
    :param storageLocationId:   a id indicating where the file should be stored. retrieved from Synapse's
                                UploadDestination

    :return: a MultipartUploadStatus_ object

    Keyword arguments are passed down to :py:func:`_start_multipart_upload`.

    .. MultipartUploadStatus:
     http://docs.synapse.org/rest/org/sagebionetworks/repo/model/file/MultipartUploadStatus.html
    .. contentType: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
    """
    partSize = calculate_part_size(fileSize, partSize, MIN_PART_SIZE, MAX_NUMBER_OF_PARTS)
    status = _start_multipart_upload(syn, filename, md5, fileSize, partSize, contentType,
                                     storageLocationId=storageLocationId, **kwargs)

    # only force restart once
    kwargs['forceRestart'] = False

    completedParts = count_completed_parts(status.partsState)
    # bytes that were previously uploaded before the current upload began. This variable is set only once
    previously_completed_bytes = min(completedParts * partSize, fileSize)
    syn.logger.debug("file partitioned into size: %s" % partSize)
    syn.logger.debug("current multipart-upload status: %s" % status)
    syn.logger.debug("previously completed %d parts, estimated %d bytes" % (completedParts, previously_completed_bytes))
    time_upload_started = time.time()
    retries = 0
    mp = pool_provider.get_pool()
    try:
        while retries < MAX_RETRIES:
            syn.logger.debug("Started retry loop for multipart_upload. Currently %d/%d retries"
                             % (retries, MAX_RETRIES))
            # keep track of the number of bytes uploaded so far
            completed = pool_provider.get_value('d', min(completedParts * partSize, fileSize))
            expired = pool_provider.get_value(ctypes.c_bool, False)

            printTransferProgress(completed.value, fileSize, prefix='Uploading', postfix=filename)

            def chunk_upload(part): return _upload_chunk(part, completed=completed, status=status,
                                                         syn=syn, filename=filename,
                                                         get_chunk_function=get_chunk_function,
                                                         fileSize=fileSize, partSize=partSize, t0=time_upload_started,
                                                         expired=expired,
                                                         bytes_already_uploaded=previously_completed_bytes)

            syn.logger.debug("fetching pre-signed urls and mapping to Pool")
            url_generator = _get_presigned_urls(syn, status.uploadId, find_parts_to_upload(status.partsState))
            mp.map(chunk_upload, url_generator)
            syn.logger.debug("completed pooled upload")

            # Check if there are still parts
            status = _start_multipart_upload(syn, filename, md5, fileSize, partSize, contentType,
                                             storageLocationId=storageLocationId, **kwargs)
            oldCompletedParts, completedParts = completedParts, count_completed_parts(status.partsState)
            progress = (completedParts > oldCompletedParts)
            retries = retries+1 if not progress else retries
            syn.logger.debug("progress made in this loop? %s" % progress)

            # Are we done, yet?
            if completed.value >= fileSize:
                try:
                    syn.logger.debug("attempting to finalize multipart upload because completed.value >= filesize"
                                     " ({completed} >= {size})".format(completed=completed.value, size=fileSize))
                    status = _complete_multipart_upload(syn, status.uploadId)
                    if status.state == "COMPLETED":
                        break
                except Exception as ex1:
                    syn.logger.error("Attempt to complete the multipart upload failed with exception %s %s"
                                     % (type(ex1), ex1))
                    syn.logger.debug("multipart upload failed:", exc_info=True)
    finally:
        mp.terminate()
    if status["state"] != "COMPLETED":
        raise SynapseError("Upload {id} did not complete. Try again.".format(id=status["uploadId"]))

    return status
Example #7
0
 def progress_callback(bytes):
     with bytes_transferred.get_lock():
         bytes_transferred.value += bytes
         printTransferProgress(bytes_transferred.value, file_size, prefix=prefix, postfix=filename,
                               dt=time.time() - t0, previouslyTransferred=0)