def annotate_with_retry(**kwargs): """Annotates submission status with retry to account for conflicting submission updates Args: **kwargs: Takes same parameters as annotate_submission """ with_retry(annotate_submission(**kwargs), wait=3, retries=10, retry_status_codes=[412, 429, 500, 502, 503, 504], verbose=True)
def _get_response_with_retry(presigned_url_provider, start: int, end: int) -> Response: session = _get_thread_session() range_header = {'Range': f'bytes={start}-{end}'} def session_get(): return session.get(presigned_url_provider.get_info().url, headers=range_header) response = None cause = None try: # currently when doing a range request to AWS we retry on anything other than a 206. # this seems a bit excessive (i.e. some 400 statuses would suggest a non-retryable condition) # but for now matching previous behavior. response = with_retry( session_get, expected_status_codes=(HTTPStatus.PARTIAL_CONTENT, ), retry_errors=RETRYABLE_CONNECTION_ERRORS, retry_exceptions=RETRYABLE_CONNECTION_EXCEPTIONS, ) except Exception as ex: cause = ex if not response or response.status_code != HTTPStatus.PARTIAL_CONTENT: raise SynapseError( f'Could not download the file: {presigned_url_provider.get_info().file_name},' f' please try again.') from cause return start, response
def test_boto_upload__acl(self): """Verify when we store a Synapse object using boto we apply a bucket-owner-full-control ACL to the object""" bucket_name, _ = get_aws_env() _, folder, storage_location_id = self._configure_storage_location( sts_enabled=True) file_contents = str(uuid.uuid4()) upload_file = self._make_temp_file(contents=file_contents) # mock the sts setting so that we upload this file using boto regardless of test configuration with mock.patch.object( self.syn, 'use_boto_sts_transfers', new_callable=mock.PropertyMock(return_value=True)): file = self.syn.store(File(path=upload_file.name, parent=folder)) s3_read_client = boto3.client('s3', **get_aws_env()[1]) bucket_acl = s3_read_client.get_bucket_acl(Bucket=bucket_name) bucket_grantee = bucket_acl['Grants'][0]['Grantee'] assert bucket_grantee['Type'] == 'CanonicalUser' bucket_owner_id = bucket_grantee['ID'] # with_retry to avoid acidity issues of an S3 put object_acl = with_retry( lambda: s3_read_client.get_object_acl( Bucket=bucket_name, Key=file['_file_handle']['key']), retry_exceptions=[s3_read_client.exceptions.NoSuchKey]) grants = object_acl['Grants'] assert len(grants) == 1 grant = grants[0] grantee = grant['Grantee'] assert grantee['Type'] == 'CanonicalUser' assert grantee['ID'] == bucket_owner_id assert grant['Permission'] == 'FULL_CONTROL'
def command_annotate_submission_with_json(syn, args): """Annotate a Synapse submission with a json file. This function is used by a ChallengeWorkflowTemplates tool. >>> challengeutils annotatesubmission 12345 annotations.json --to_public """ # By default is_private is True, so the cli is to_public as False # Which would be that is_private is True. is_private = not args.to_public with_retry(lambda: annotations.annotate_submission_with_json( syn, args.submissionid, args.annotation_values, is_private=is_private, force=args.force), wait=3, retries=10, retry_status_codes=[412, 429, 500, 502, 503, 504], verbose=True)
def test_with_retry(): retryParams = {"retries": 3, "wait": 0} response = MagicMock() function = MagicMock() function.return_value = response # -- No failures -- response.status_code.__eq__.side_effect = lambda x: x == 250 with_retry(function, verbose=True, **retryParams) assert function.call_count == 1 # -- Always fail -- response.status_code.__eq__.side_effect = lambda x: x == 503 with_retry(function, verbose=True, **retryParams) assert function.call_count == 1 + 4 # -- Fail then succeed -- thirdTimes = [3, 2, 1] def theCharm(x): if x == 503: count = thirdTimes.pop() return count != 3 return x == 503 response.status_code.__eq__.side_effect = theCharm with_retry(function, verbose=True, **retryParams) assert function.call_count == 1 + 4 + 3 # -- Retry with an error message -- retryErrorMessages = ["Foo"] retryParams["retry_errors"] = retryErrorMessages response.status_code.__eq__.side_effect = lambda x: x == 500 response.headers.__contains__.reset_mock() response.headers.__contains__.side_effect = lambda x: x == 'content-type' response.headers.get.side_effect = lambda x, default_value: "application/json" if x == 'content-type' else None response.json.return_value = {"reason": retryErrorMessages[0]} with_retry(function, **retryParams) assert response.headers.get.called assert function.call_count == 1 + 4 + 3 + 4 # -- Propagate an error up -- print("Expect a SynapseError: Bar") def foo(): raise SynapseError("Bar") function.side_effect = foo pytest.raises(SynapseError, with_retry, function, **retryParams) assert function.call_count == 1 + 4 + 3 + 4 + 1
def _retry_pysftp_connection(*conn_args, **conn_kwargs): pysftp = SFTPWrapper._attempt_import_sftp() # handle error reading banner which can mean an overloaded SSH server, # especially in the context of our integration tests if there are multiple concurrent # test suites running aginst the test micro instance # https://stackoverflow.com/a/29225295 sftp = with_retry(lambda: pysftp.Connection(*conn_args, **conn_kwargs), retry_errors=['Error reading SSH protocol banner']) try: yield sftp finally: sftp.close()
def test_with_retry__no_status_code(): """Verify that with_retry can also be used on any function even whose return values don't have status_codes. In that case just for its exception retrying and back off capabiliies.""" x = 0 def fn(): nonlocal x x += 1 if x < 2: raise ValueError('not yet') return x response = with_retry(fn, retry_exceptions=[ValueError]) assert 2 == response
def test_with_retry__expected_status_code(): """Verify using retry expected_status_codes""" non_matching_response = MagicMock(spec=Response) non_matching_response.status_code = 200 matching_response = MagicMock(spec=Response) matching_response.status_code = 201 fn = MagicMock() fn.side_effect = [ non_matching_response, matching_response, ] response = with_retry(fn, expected_status_codes=[201]) assert response == matching_response
def test_with_retry__empty_status_codes(values): """Verify that passing some Falsey values for the various sequence args is ok""" response = MagicMock(spec=Response) response.status_code = 200 fn = MagicMock() fn.return_value = response # no unexpected exceptions etc should be raised returned_response = with_retry( fn, retry_status_codes=values, expected_status_codes=values, retry_exceptions=values, retry_errors=values, ) assert returned_response == response
def _handle_part(self, part_number): with self._lock: if self._aborted: # this upload attempt has already been aborted # so we short circuit the attempt to upload this part raise SynapseUploadAbortedException( "Upload aborted, skipping part {}".format(part_number) ) part_url, signed_headers = self._pre_signed_part_urls.get(part_number) session = self._get_thread_session() # obtain the body (i.e. the upload bytes) for the given part number. body = self._part_request_body_provider_fn(part_number) if self._part_request_body_provider_fn else None part_size = len(body) if body else 0 for retry in range(2): def put_fn(): return session.put(part_url, body, headers=signed_headers) try: # use our backoff mechanism here, we have encountered 500s on puts to AWS signed urls response = with_retry(put_fn, retry_exceptions=[requests.exceptions.ConnectionError]) _raise_for_status(response) # completed upload part to s3 successfully break except SynapseHTTPError as ex: if ex.response.status_code == 403 and retry < 1: # we interpret this to mean our pre_signed url expired. self._syn.logger.debug( "The pre-signed upload URL for part {} has expired." "Refreshing urls and retrying.\n".format(part_number) ) # we refresh all the urls and obtain this part's # specific url for the retry part_url, signed_headers = self._refresh_pre_signed_part_urls( part_number, part_url, ) else: raise md5_hex = self._md5_fn(body, response) # now tell synapse that we uploaded that part successfully self._syn.restPUT( "/file/multipart/{upload_id}/add/{part_number}?partMD5Hex={md5}" .format( upload_id=self._upload_id, part_number=part_number, md5=md5_hex, ), requests_session=session, endpoint=self._syn.fileHandleEndpoint ) # remove so future batch pre_signed url fetches will exclude this part with self._lock: del self._pre_signed_part_urls[part_number] return part_number, part_size
def test_sts_external_storage_location(self): """Test creating and using an external STS storage location. A custom storage location is created with sts enabled, a file is uploaded directly via boto using STS credentials, a file handle is created for it, and then it is read directly via boto using STS read credentials. """ bucket_name, _ = get_aws_env() _, folder, storage_location_id = self._configure_storage_location( sts_enabled=True) sts_read_creds = self.syn.get_sts_storage_token(folder['id'], 'read_only', output_format='boto') sts_write_creds = self.syn.get_sts_storage_token(folder['id'], 'read_write', output_format='boto') s3_read_client = boto3.client('s3', **sts_read_creds) s3_write_client = boto3.client('s3', **sts_write_creds) # put an object directly using our sts creds file_contents = 'saved using sts' temp_file = self._make_temp_file(contents=file_contents, suffix='.txt') remote_key = f"{folder.name}/sts_saved" # verify that the read credentials are in fact read only with pytest.raises(Exception) as ex_cm: s3_read_client.upload_file( Filename=temp_file.name, Bucket=bucket_name, Key=remote_key, ) assert 'Access Denied' in str(ex_cm.value) # now create a file directly in s3 using our STS creds s3_write_client.upload_file( Filename=temp_file.name, Bucket=bucket_name, Key=remote_key, ExtraArgs={'ACL': 'bucket-owner-full-control'}, ) # now read the file using our read credentials # S3 is not ACID so we add a retry here to try to ensure our # object will be available before we try to create the handle with_retry(lambda: s3_read_client.get_object(Bucket=bucket_name, Key=remote_key), retry_exceptions=[s3_read_client.exceptions.NoSuchKey]) # create an external file handle so we can read it via synapse file_handle = self.syn.create_external_s3_file_handle( bucket_name, remote_key, temp_file.name, storage_location_id=storage_location_id, ) file = File(parentId=folder['id'], dataFileHandleId=file_handle['id']) file_entity = self.syn.store(file) # now should be able to retrieve the file via synapse retrieved_file_entity = self.syn.get(file_entity['id']) with open(retrieved_file_entity.path, 'r') as f: assert file_contents == f.read()