def collect_errors_and_fail(syn): """Pulls error traces from the error queue and fails if the queue is not empty.""" failures = [] for i in range(syn.test_errors.qsize()): failures.append(syn.test_errors.get()) if len(failures) > 0: raise SynapseError('\n' + '\n'.join(failures))
def _get_response_with_retry(presigned_url_provider, start: int, end: int) -> Response: session = _get_thread_session() range_header = {'Range': f'bytes={start}-{end}'} def session_get(): return session.get(presigned_url_provider.get_info().url, headers=range_header) response = None cause = None try: # currently when doing a range request to AWS we retry on anything other than a 206. # this seems a bit excessive (i.e. some 400 statuses would suggest a non-retryable condition) # but for now matching previous behavior. response = with_retry( session_get, expected_status_codes=(HTTPStatus.PARTIAL_CONTENT, ), retry_errors=RETRYABLE_CONNECTION_ERRORS, retry_exceptions=RETRYABLE_CONNECTION_EXCEPTIONS, ) except Exception as ex: cause = ex if not response or response.status_code != HTTPStatus.PARTIAL_CONTENT: raise SynapseError( f'Could not download the file: {presigned_url_provider.get_info().file_name},' f' please try again.') from cause return start, response
def _get_response_with_retry(presigned_url_provider, start: int, end: int) -> Response: session = _get_thread_session() range_header = {'Range': f'bytes={start}-{end}'} response = session.get(presigned_url_provider.get_info().url, headers=range_header, stream=True) # try request until successful or out of retries try_counter = 1 while response.status_code != HTTPStatus.PARTIAL_CONTENT: if try_counter >= MAX_RETRIES: raise SynapseError( f'Could not download the file: {presigned_url_provider.get_info().file_name},' f' please try again.') response = session.get(presigned_url_provider.get_info().url, headers=range_header, stream=True) try_counter += 1 return start, response
def foo(): raise SynapseError("Bar")
def used(self, target=None, targetVersion=None, wasExecuted=None, url=None, name=None): """ Add a resource used by the activity. This method tries to be as permissive as possible. It accepts a string which might be a synapse ID or a URL, a synapse entity, a UsedEntity or UsedURL dictionary or a list containing any combination of these. In addition, named parameters can be used to specify the fields of either a UsedEntity or a UsedURL. If target and optionally targetVersion are specified, create a UsedEntity. If url and optionally name are specified, create a UsedURL. It is an error to specify both target/targetVersion parameters and url/name parameters in the same call. To add multiple UsedEntities and UsedURLs, make a separate call for each or pass in a list. In case of conflicting settings for wasExecuted both inside an object and with a parameter, the parameter wins. For example, this UsedURL will have wasExecuted set to False:: activity.used({'url':'http://google.com', 'name':'Goog', 'wasExecuted':True}, wasExecuted=False) Entity examples:: activity.used('syn12345') activity.used(entity) activity.used(target=entity, targetVersion=2) activity.used(codeEntity, wasExecuted=True) activity.used({'reference':{'target':'syn12345', 'targetVersion':1}, 'wasExecuted':False}) URL examples:: activity.used('http://mydomain.com/my/awesome/data.RData') activity.used(url='http://mydomain.com/my/awesome/data.RData', name='Awesome Data') activity.used(url='https://github.com/joe_hacker/code_repo', name='Gnarly hacks', wasExecuted=True) activity.used({'url':'https://github.com/joe_hacker/code_repo', 'name':'Gnarly hacks'}, wasExecuted=True) List example:: activity.used(['syn12345', 'syn23456', entity, \ {'reference':{'target':'syn100009', 'targetVersion':2}, 'wasExecuted':True}, \ 'http://mydomain.com/my/awesome/data.RData']) """ # -- A list of targets if isinstance(target, list): badargs = _get_any_bad_args(['targetVersion', 'url', 'name'], locals()) _raise_incorrect_used_usage(badargs, 'list of used resources') for item in target: self.used(item, wasExecuted=wasExecuted) return # -- UsedEntity elif is_used_entity(target): badargs = _get_any_bad_args(['targetVersion', 'url', 'name'], locals()) _raise_incorrect_used_usage( badargs, 'dictionary representing a used resource') resource = target if 'concreteType' not in resource: resource[ 'concreteType'] = 'org.sagebionetworks.repo.model.provenance.UsedEntity' # -- Used URL elif is_used_url(target): badargs = _get_any_bad_args(['targetVersion', 'url', 'name'], locals()) _raise_incorrect_used_usage(badargs, 'URL') resource = target if 'concreteType' not in resource: resource[ 'concreteType'] = 'org.sagebionetworks.repo.model.provenance.UsedURL' # -- Synapse Entity elif is_synapse_entity(target): badargs = _get_any_bad_args(['url', 'name'], locals()) _raise_incorrect_used_usage(badargs, 'Synapse entity') reference = {'targetId': target['id']} if 'versionNumber' in target: reference['targetVersionNumber'] = target['versionNumber'] if targetVersion: reference['targetVersionNumber'] = int(targetVersion) resource = { 'reference': reference, 'concreteType': 'org.sagebionetworks.repo.model.provenance.UsedEntity' } # -- URL parameter elif url: badargs = _get_any_bad_args(['target', 'targetVersion'], locals()) _raise_incorrect_used_usage(badargs, 'URL') resource = { 'url': url, 'name': name if name else target, 'concreteType': 'org.sagebionetworks.repo.model.provenance.UsedURL' } # -- URL as a string elif is_url(target): badargs = _get_any_bad_args(['targetVersion'], locals()) _raise_incorrect_used_usage(badargs, 'URL') resource = { 'url': target, 'name': name if name else target, 'concreteType': 'org.sagebionetworks.repo.model.provenance.UsedURL' } # -- Synapse Entity ID (assuming the string is an ID) elif isinstance(target, str): badargs = _get_any_bad_args(['url', 'name'], locals()) _raise_incorrect_used_usage(badargs, 'Synapse entity') vals = target.split('.') # Handle synapseIds of from syn234.4 if not is_synapse_id(vals[0]): raise ValueError('%s is not a valid Synapse id' % target) if len(vals) == 2: if targetVersion and int(targetVersion) != int(vals[1]): raise ValueError( 'Two conflicting versions for %s were specified' % target) targetVersion = int(vals[1]) reference = {'targetId': vals[0]} if targetVersion: reference['targetVersionNumber'] = int(targetVersion) resource = { 'reference': reference, 'concreteType': 'org.sagebionetworks.repo.model.provenance.UsedEntity' } else: raise SynapseError( 'Unexpected parameters in call to Activity.used().') # Set wasExecuted if wasExecuted is None: # Default to False if 'wasExecuted' not in resource: resource['wasExecuted'] = False else: # wasExecuted parameter overrides setting in an object resource['wasExecuted'] = wasExecuted # Add the used resource to the activity self['used'].append(resource)
def _multipart_upload(syn, filename, contentType, get_chunk_function, md5, fileSize, partSize=None, storageLocationId=None, **kwargs): """ Multipart Upload. :param syn: a Synapse object :param filename: a string containing the base filename :param contentType: contentType_ :param get_chunk_function: a function that takes a part number and size and returns the bytes of that chunk of the file :param md5: the part's MD5 as hex. :param fileSize: total number of bytes :param partSize: number of bytes per part. Minimum 5MB. :param storageLocationId: a id indicating where the file should be stored. retrieved from Synapse's UploadDestination :return: a MultipartUploadStatus_ object Keyword arguments are passed down to :py:func:`_start_multipart_upload`. .. MultipartUploadStatus: http://docs.synapse.org/rest/org/sagebionetworks/repo/model/file/MultipartUploadStatus.html .. contentType: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17 """ partSize = calculate_part_size(fileSize, partSize, MIN_PART_SIZE, MAX_NUMBER_OF_PARTS) status = _start_multipart_upload(syn, filename, md5, fileSize, partSize, contentType, storageLocationId=storageLocationId, **kwargs) # only force restart once kwargs['forceRestart'] = False completedParts = count_completed_parts(status.partsState) # bytes that were previously uploaded before the current upload began. This variable is set only once previously_completed_bytes = min(completedParts * partSize, fileSize) syn.logger.debug("file partitioned into size: %s" % partSize) syn.logger.debug("current multipart-upload status: %s" % status) syn.logger.debug("previously completed %d parts, estimated %d bytes" % (completedParts, previously_completed_bytes)) time_upload_started = time.time() retries = 0 mp = pool_provider.get_pool() try: while retries < MAX_RETRIES: syn.logger.debug("Started retry loop for multipart_upload. Currently %d/%d retries" % (retries, MAX_RETRIES)) # keep track of the number of bytes uploaded so far completed = pool_provider.get_value('d', min(completedParts * partSize, fileSize)) expired = pool_provider.get_value(ctypes.c_bool, False) printTransferProgress(completed.value, fileSize, prefix='Uploading', postfix=filename) def chunk_upload(part): return _upload_chunk(part, completed=completed, status=status, syn=syn, filename=filename, get_chunk_function=get_chunk_function, fileSize=fileSize, partSize=partSize, t0=time_upload_started, expired=expired, bytes_already_uploaded=previously_completed_bytes) syn.logger.debug("fetching pre-signed urls and mapping to Pool") url_generator = _get_presigned_urls(syn, status.uploadId, find_parts_to_upload(status.partsState)) mp.map(chunk_upload, url_generator) syn.logger.debug("completed pooled upload") # Check if there are still parts status = _start_multipart_upload(syn, filename, md5, fileSize, partSize, contentType, storageLocationId=storageLocationId, **kwargs) oldCompletedParts, completedParts = completedParts, count_completed_parts(status.partsState) progress = (completedParts > oldCompletedParts) retries = retries+1 if not progress else retries syn.logger.debug("progress made in this loop? %s" % progress) # Are we done, yet? if completed.value >= fileSize: try: syn.logger.debug("attempting to finalize multipart upload because completed.value >= filesize" " ({completed} >= {size})".format(completed=completed.value, size=fileSize)) status = _complete_multipart_upload(syn, status.uploadId) if status.state == "COMPLETED": break except Exception as ex1: syn.logger.error("Attempt to complete the multipart upload failed with exception %s %s" % (type(ex1), ex1)) syn.logger.debug("multipart upload failed:", exc_info=True) finally: mp.terminate() if status["state"] != "COMPLETED": raise SynapseError("Upload {id} did not complete. Try again.".format(id=status["uploadId"])) return status