def collect_errors_and_fail(syn):
    """Pulls error traces from the error queue and fails if the queue is not empty."""
    failures = []
    for i in range(syn.test_errors.qsize()):
        failures.append(syn.test_errors.get())
    if len(failures) > 0:
        raise SynapseError('\n' + '\n'.join(failures))
    def _get_response_with_retry(presigned_url_provider, start: int,
                                 end: int) -> Response:
        session = _get_thread_session()
        range_header = {'Range': f'bytes={start}-{end}'}

        def session_get():
            return session.get(presigned_url_provider.get_info().url,
                               headers=range_header)

        response = None
        cause = None
        try:
            # currently when doing a range request to AWS we retry on anything other than a 206.
            # this seems a bit excessive (i.e. some 400 statuses would suggest a non-retryable condition)
            # but for now matching previous behavior.
            response = with_retry(
                session_get,
                expected_status_codes=(HTTPStatus.PARTIAL_CONTENT, ),
                retry_errors=RETRYABLE_CONNECTION_ERRORS,
                retry_exceptions=RETRYABLE_CONNECTION_EXCEPTIONS,
            )
        except Exception as ex:
            cause = ex

        if not response or response.status_code != HTTPStatus.PARTIAL_CONTENT:
            raise SynapseError(
                f'Could not download the file: {presigned_url_provider.get_info().file_name},'
                f' please try again.') from cause

        return start, response
 def _get_response_with_retry(presigned_url_provider, start: int, end: int) -> Response:
     session = _get_thread_session()
     range_header = {'Range': f'bytes={start}-{end}'}
     response = session.get(presigned_url_provider.get_info().url, headers=range_header, stream=True)
     # try request until successful or out of retries
     try_counter = 1
     while response.status_code != HTTPStatus.PARTIAL_CONTENT:
         if try_counter >= MAX_RETRIES:
             raise SynapseError(
                 f'Could not download the file: {presigned_url_provider.get_info().file_name},'
                 f' please try again.')
         response = session.get(presigned_url_provider.get_info().url, headers=range_header, stream=True)
         try_counter += 1
     return start, response
 def foo():
     raise SynapseError("Bar")
    def used(self,
             target=None,
             targetVersion=None,
             wasExecuted=None,
             url=None,
             name=None):
        """
        Add a resource used by the activity.

        This method tries to be as permissive as possible. It accepts a string which might be a synapse ID or a URL,
        a synapse entity, a UsedEntity or UsedURL dictionary or a list containing any combination of these.

        In addition, named parameters can be used to specify the fields of either a UsedEntity or a UsedURL.
        If target and optionally targetVersion are specified, create a UsedEntity.
        If url and optionally name are specified, create a UsedURL.

        It is an error to specify both target/targetVersion parameters and url/name parameters in the same call.
        To add multiple UsedEntities and UsedURLs, make a separate call for each or pass in a list.

        In case of conflicting settings for wasExecuted both inside an object and with a parameter, the parameter wins.
        For example, this UsedURL will have wasExecuted set to False::

            activity.used({'url':'http://google.com', 'name':'Goog', 'wasExecuted':True}, wasExecuted=False)

        Entity examples::

            activity.used('syn12345')
            activity.used(entity)
            activity.used(target=entity, targetVersion=2)
            activity.used(codeEntity, wasExecuted=True)
            activity.used({'reference':{'target':'syn12345', 'targetVersion':1}, 'wasExecuted':False})

        URL examples::

            activity.used('http://mydomain.com/my/awesome/data.RData')
            activity.used(url='http://mydomain.com/my/awesome/data.RData', name='Awesome Data')
            activity.used(url='https://github.com/joe_hacker/code_repo', name='Gnarly hacks', wasExecuted=True)
            activity.used({'url':'https://github.com/joe_hacker/code_repo', 'name':'Gnarly hacks'}, wasExecuted=True)

        List example::

            activity.used(['syn12345', 'syn23456', entity, \
                          {'reference':{'target':'syn100009', 'targetVersion':2}, 'wasExecuted':True}, \
                          'http://mydomain.com/my/awesome/data.RData'])
        """
        # -- A list of targets
        if isinstance(target, list):
            badargs = _get_any_bad_args(['targetVersion', 'url', 'name'],
                                        locals())
            _raise_incorrect_used_usage(badargs, 'list of used resources')

            for item in target:
                self.used(item, wasExecuted=wasExecuted)
            return

        # -- UsedEntity
        elif is_used_entity(target):
            badargs = _get_any_bad_args(['targetVersion', 'url', 'name'],
                                        locals())
            _raise_incorrect_used_usage(
                badargs, 'dictionary representing a used resource')

            resource = target
            if 'concreteType' not in resource:
                resource[
                    'concreteType'] = 'org.sagebionetworks.repo.model.provenance.UsedEntity'

        # -- Used URL
        elif is_used_url(target):
            badargs = _get_any_bad_args(['targetVersion', 'url', 'name'],
                                        locals())
            _raise_incorrect_used_usage(badargs, 'URL')

            resource = target
            if 'concreteType' not in resource:
                resource[
                    'concreteType'] = 'org.sagebionetworks.repo.model.provenance.UsedURL'

        # -- Synapse Entity
        elif is_synapse_entity(target):
            badargs = _get_any_bad_args(['url', 'name'], locals())
            _raise_incorrect_used_usage(badargs, 'Synapse entity')

            reference = {'targetId': target['id']}
            if 'versionNumber' in target:
                reference['targetVersionNumber'] = target['versionNumber']
            if targetVersion:
                reference['targetVersionNumber'] = int(targetVersion)
            resource = {
                'reference':
                reference,
                'concreteType':
                'org.sagebionetworks.repo.model.provenance.UsedEntity'
            }
        # -- URL parameter
        elif url:
            badargs = _get_any_bad_args(['target', 'targetVersion'], locals())
            _raise_incorrect_used_usage(badargs, 'URL')

            resource = {
                'url': url,
                'name': name if name else target,
                'concreteType':
                'org.sagebionetworks.repo.model.provenance.UsedURL'
            }

        # -- URL as a string
        elif is_url(target):
            badargs = _get_any_bad_args(['targetVersion'], locals())
            _raise_incorrect_used_usage(badargs, 'URL')
            resource = {
                'url': target,
                'name': name if name else target,
                'concreteType':
                'org.sagebionetworks.repo.model.provenance.UsedURL'
            }

        # -- Synapse Entity ID (assuming the string is an ID)
        elif isinstance(target, str):
            badargs = _get_any_bad_args(['url', 'name'], locals())
            _raise_incorrect_used_usage(badargs, 'Synapse entity')
            vals = target.split('.')  # Handle synapseIds of from syn234.4
            if not is_synapse_id(vals[0]):
                raise ValueError('%s is not a valid Synapse id' % target)
            if len(vals) == 2:
                if targetVersion and int(targetVersion) != int(vals[1]):
                    raise ValueError(
                        'Two conflicting versions for %s were specified' %
                        target)
                targetVersion = int(vals[1])
            reference = {'targetId': vals[0]}
            if targetVersion:
                reference['targetVersionNumber'] = int(targetVersion)
            resource = {
                'reference':
                reference,
                'concreteType':
                'org.sagebionetworks.repo.model.provenance.UsedEntity'
            }
        else:
            raise SynapseError(
                'Unexpected parameters in call to Activity.used().')

        # Set wasExecuted
        if wasExecuted is None:
            # Default to False
            if 'wasExecuted' not in resource:
                resource['wasExecuted'] = False
        else:
            # wasExecuted parameter overrides setting in an object
            resource['wasExecuted'] = wasExecuted

        # Add the used resource to the activity
        self['used'].append(resource)
Exemple #6
0
def _multipart_upload(syn, filename, contentType, get_chunk_function, md5, fileSize, 
                      partSize=None, storageLocationId=None, **kwargs):
    """
    Multipart Upload.

    :param syn:                 a Synapse object
    :param filename:            a string containing the base filename
    :param contentType:         contentType_
    :param get_chunk_function:  a function that takes a part number and size and returns the bytes of that chunk of the
                                file
    :param md5:                 the part's MD5 as hex.
    :param fileSize:            total number of bytes
    :param partSize:            number of bytes per part. Minimum 5MB.
    :param storageLocationId:   a id indicating where the file should be stored. retrieved from Synapse's
                                UploadDestination

    :return: a MultipartUploadStatus_ object

    Keyword arguments are passed down to :py:func:`_start_multipart_upload`.

    .. MultipartUploadStatus:
     http://docs.synapse.org/rest/org/sagebionetworks/repo/model/file/MultipartUploadStatus.html
    .. contentType: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
    """
    partSize = calculate_part_size(fileSize, partSize, MIN_PART_SIZE, MAX_NUMBER_OF_PARTS)
    status = _start_multipart_upload(syn, filename, md5, fileSize, partSize, contentType,
                                     storageLocationId=storageLocationId, **kwargs)

    # only force restart once
    kwargs['forceRestart'] = False

    completedParts = count_completed_parts(status.partsState)
    # bytes that were previously uploaded before the current upload began. This variable is set only once
    previously_completed_bytes = min(completedParts * partSize, fileSize)
    syn.logger.debug("file partitioned into size: %s" % partSize)
    syn.logger.debug("current multipart-upload status: %s" % status)
    syn.logger.debug("previously completed %d parts, estimated %d bytes" % (completedParts, previously_completed_bytes))
    time_upload_started = time.time()
    retries = 0
    mp = pool_provider.get_pool()
    try:
        while retries < MAX_RETRIES:
            syn.logger.debug("Started retry loop for multipart_upload. Currently %d/%d retries"
                             % (retries, MAX_RETRIES))
            # keep track of the number of bytes uploaded so far
            completed = pool_provider.get_value('d', min(completedParts * partSize, fileSize))
            expired = pool_provider.get_value(ctypes.c_bool, False)

            printTransferProgress(completed.value, fileSize, prefix='Uploading', postfix=filename)

            def chunk_upload(part): return _upload_chunk(part, completed=completed, status=status,
                                                         syn=syn, filename=filename,
                                                         get_chunk_function=get_chunk_function,
                                                         fileSize=fileSize, partSize=partSize, t0=time_upload_started,
                                                         expired=expired,
                                                         bytes_already_uploaded=previously_completed_bytes)

            syn.logger.debug("fetching pre-signed urls and mapping to Pool")
            url_generator = _get_presigned_urls(syn, status.uploadId, find_parts_to_upload(status.partsState))
            mp.map(chunk_upload, url_generator)
            syn.logger.debug("completed pooled upload")

            # Check if there are still parts
            status = _start_multipart_upload(syn, filename, md5, fileSize, partSize, contentType,
                                             storageLocationId=storageLocationId, **kwargs)
            oldCompletedParts, completedParts = completedParts, count_completed_parts(status.partsState)
            progress = (completedParts > oldCompletedParts)
            retries = retries+1 if not progress else retries
            syn.logger.debug("progress made in this loop? %s" % progress)

            # Are we done, yet?
            if completed.value >= fileSize:
                try:
                    syn.logger.debug("attempting to finalize multipart upload because completed.value >= filesize"
                                     " ({completed} >= {size})".format(completed=completed.value, size=fileSize))
                    status = _complete_multipart_upload(syn, status.uploadId)
                    if status.state == "COMPLETED":
                        break
                except Exception as ex1:
                    syn.logger.error("Attempt to complete the multipart upload failed with exception %s %s"
                                     % (type(ex1), ex1))
                    syn.logger.debug("multipart upload failed:", exc_info=True)
    finally:
        mp.terminate()
    if status["state"] != "COMPLETED":
        raise SynapseError("Upload {id} did not complete. Try again.".format(id=status["uploadId"]))

    return status