Example #1
0
def annotate_with_retry(**kwargs):
    """Annotates submission status with retry to account for
    conflicting submission updates

    Args:
        **kwargs: Takes same parameters as annotate_submission
    """
    with_retry(annotate_submission(**kwargs),
               wait=3,
               retries=10,
               retry_status_codes=[412, 429, 500, 502, 503, 504],
               verbose=True)
    def _get_response_with_retry(presigned_url_provider, start: int,
                                 end: int) -> Response:
        session = _get_thread_session()
        range_header = {'Range': f'bytes={start}-{end}'}

        def session_get():
            return session.get(presigned_url_provider.get_info().url,
                               headers=range_header)

        response = None
        cause = None
        try:
            # currently when doing a range request to AWS we retry on anything other than a 206.
            # this seems a bit excessive (i.e. some 400 statuses would suggest a non-retryable condition)
            # but for now matching previous behavior.
            response = with_retry(
                session_get,
                expected_status_codes=(HTTPStatus.PARTIAL_CONTENT, ),
                retry_errors=RETRYABLE_CONNECTION_ERRORS,
                retry_exceptions=RETRYABLE_CONNECTION_EXCEPTIONS,
            )
        except Exception as ex:
            cause = ex

        if not response or response.status_code != HTTPStatus.PARTIAL_CONTENT:
            raise SynapseError(
                f'Could not download the file: {presigned_url_provider.get_info().file_name},'
                f' please try again.') from cause

        return start, response
    def test_boto_upload__acl(self):
        """Verify when we store a Synapse object using boto we apply a bucket-owner-full-control ACL to the object"""
        bucket_name, _ = get_aws_env()
        _, folder, storage_location_id = self._configure_storage_location(
            sts_enabled=True)

        file_contents = str(uuid.uuid4())
        upload_file = self._make_temp_file(contents=file_contents)

        # mock the sts setting so that we upload this file using boto regardless of test configuration
        with mock.patch.object(
                self.syn,
                'use_boto_sts_transfers',
                new_callable=mock.PropertyMock(return_value=True)):
            file = self.syn.store(File(path=upload_file.name, parent=folder))

        s3_read_client = boto3.client('s3', **get_aws_env()[1])
        bucket_acl = s3_read_client.get_bucket_acl(Bucket=bucket_name)
        bucket_grantee = bucket_acl['Grants'][0]['Grantee']
        assert bucket_grantee['Type'] == 'CanonicalUser'
        bucket_owner_id = bucket_grantee['ID']

        # with_retry to avoid acidity issues of an S3 put
        object_acl = with_retry(
            lambda: s3_read_client.get_object_acl(
                Bucket=bucket_name, Key=file['_file_handle']['key']),
            retry_exceptions=[s3_read_client.exceptions.NoSuchKey])
        grants = object_acl['Grants']
        assert len(grants) == 1
        grant = grants[0]
        grantee = grant['Grantee']
        assert grantee['Type'] == 'CanonicalUser'
        assert grantee['ID'] == bucket_owner_id
        assert grant['Permission'] == 'FULL_CONTROL'
Example #4
0
def command_annotate_submission_with_json(syn, args):
    """Annotate a Synapse submission with a json file.  This function
    is used by a ChallengeWorkflowTemplates tool.

    >>> challengeutils annotatesubmission 12345 annotations.json --to_public
    """
    # By default is_private is True, so the cli is to_public as False
    # Which would be that is_private is True.
    is_private = not args.to_public
    with_retry(lambda: annotations.annotate_submission_with_json(
        syn,
        args.submissionid,
        args.annotation_values,
        is_private=is_private,
        force=args.force),
               wait=3,
               retries=10,
               retry_status_codes=[412, 429, 500, 502, 503, 504],
               verbose=True)
def test_with_retry():
    retryParams = {"retries": 3, "wait": 0}
    response = MagicMock()
    function = MagicMock()
    function.return_value = response

    # -- No failures --
    response.status_code.__eq__.side_effect = lambda x: x == 250
    with_retry(function, verbose=True, **retryParams)
    assert function.call_count == 1

    # -- Always fail --
    response.status_code.__eq__.side_effect = lambda x: x == 503
    with_retry(function, verbose=True, **retryParams)
    assert function.call_count == 1 + 4

    # -- Fail then succeed --
    thirdTimes = [3, 2, 1]

    def theCharm(x):
        if x == 503:
            count = thirdTimes.pop()
            return count != 3
        return x == 503

    response.status_code.__eq__.side_effect = theCharm
    with_retry(function, verbose=True, **retryParams)
    assert function.call_count == 1 + 4 + 3

    # -- Retry with an error message --
    retryErrorMessages = ["Foo"]
    retryParams["retry_errors"] = retryErrorMessages
    response.status_code.__eq__.side_effect = lambda x: x == 500
    response.headers.__contains__.reset_mock()
    response.headers.__contains__.side_effect = lambda x: x == 'content-type'
    response.headers.get.side_effect = lambda x, default_value: "application/json" if x == 'content-type' else None
    response.json.return_value = {"reason": retryErrorMessages[0]}
    with_retry(function, **retryParams)
    assert response.headers.get.called
    assert function.call_count == 1 + 4 + 3 + 4

    # -- Propagate an error up --
    print("Expect a SynapseError: Bar")

    def foo():
        raise SynapseError("Bar")

    function.side_effect = foo
    pytest.raises(SynapseError, with_retry, function, **retryParams)
    assert function.call_count == 1 + 4 + 3 + 4 + 1
def _retry_pysftp_connection(*conn_args, **conn_kwargs):
    pysftp = SFTPWrapper._attempt_import_sftp()

    # handle error reading banner which can mean an overloaded SSH server,
    # especially in the context of our integration tests if there are multiple concurrent
    # test suites running aginst the test micro instance
    # https://stackoverflow.com/a/29225295
    sftp = with_retry(lambda: pysftp.Connection(*conn_args, **conn_kwargs),
                      retry_errors=['Error reading SSH protocol banner'])
    try:
        yield sftp
    finally:
        sftp.close()
def test_with_retry__no_status_code():
    """Verify that with_retry can also be used on any function
    even whose return values don't have status_codes.
    In that case just for its exception retrying
    and back off capabiliies."""

    x = 0

    def fn():
        nonlocal x
        x += 1
        if x < 2:
            raise ValueError('not yet')
        return x

    response = with_retry(fn, retry_exceptions=[ValueError])
    assert 2 == response
def test_with_retry__expected_status_code():
    """Verify using retry expected_status_codes"""

    non_matching_response = MagicMock(spec=Response)
    non_matching_response.status_code = 200

    matching_response = MagicMock(spec=Response)
    matching_response.status_code = 201

    fn = MagicMock()
    fn.side_effect = [
        non_matching_response,
        matching_response,
    ]

    response = with_retry(fn, expected_status_codes=[201])
    assert response == matching_response
def test_with_retry__empty_status_codes(values):
    """Verify that passing some Falsey values for the various sequence args is ok"""
    response = MagicMock(spec=Response)
    response.status_code = 200

    fn = MagicMock()
    fn.return_value = response

    # no unexpected exceptions etc should be raised
    returned_response = with_retry(
        fn,
        retry_status_codes=values,
        expected_status_codes=values,
        retry_exceptions=values,
        retry_errors=values,
    )
    assert returned_response == response
Example #10
0
    def _handle_part(self, part_number):
        with self._lock:
            if self._aborted:
                # this upload attempt has already been aborted
                # so we short circuit the attempt to upload this part
                raise SynapseUploadAbortedException(
                    "Upload aborted, skipping part {}".format(part_number)
                )

            part_url, signed_headers = self._pre_signed_part_urls.get(part_number)

        session = self._get_thread_session()

        # obtain the body (i.e. the upload bytes) for the given part number.
        body = self._part_request_body_provider_fn(part_number) if self._part_request_body_provider_fn else None
        part_size = len(body) if body else 0
        for retry in range(2):
            def put_fn():
                return session.put(part_url, body, headers=signed_headers)
            try:
                # use our backoff mechanism here, we have encountered 500s on puts to AWS signed urls
                response = with_retry(put_fn, retry_exceptions=[requests.exceptions.ConnectionError])
                _raise_for_status(response)

                # completed upload part to s3 successfully
                break

            except SynapseHTTPError as ex:
                if ex.response.status_code == 403 and retry < 1:
                    # we interpret this to mean our pre_signed url expired.
                    self._syn.logger.debug(
                        "The pre-signed upload URL for part {} has expired."
                        "Refreshing urls and retrying.\n".format(part_number)
                    )

                    # we refresh all the urls and obtain this part's
                    # specific url for the retry
                    part_url, signed_headers = self._refresh_pre_signed_part_urls(
                        part_number,
                        part_url,
                    )

                else:
                    raise

        md5_hex = self._md5_fn(body, response)

        # now tell synapse that we uploaded that part successfully
        self._syn.restPUT(
            "/file/multipart/{upload_id}/add/{part_number}?partMD5Hex={md5}"
            .format(
                upload_id=self._upload_id,
                part_number=part_number,
                md5=md5_hex,
            ),
            requests_session=session,
            endpoint=self._syn.fileHandleEndpoint
        )

        # remove so future batch pre_signed url fetches will exclude this part
        with self._lock:
            del self._pre_signed_part_urls[part_number]

        return part_number, part_size
    def test_sts_external_storage_location(self):
        """Test creating and using an external STS storage location.
        A custom storage location is created with sts enabled,
        a file is uploaded directly via boto using STS credentials,
        a file handle is created for it, and then it is read directly
        via boto using STS read credentials.
        """
        bucket_name, _ = get_aws_env()
        _, folder, storage_location_id = self._configure_storage_location(
            sts_enabled=True)

        sts_read_creds = self.syn.get_sts_storage_token(folder['id'],
                                                        'read_only',
                                                        output_format='boto')
        sts_write_creds = self.syn.get_sts_storage_token(folder['id'],
                                                         'read_write',
                                                         output_format='boto')

        s3_read_client = boto3.client('s3', **sts_read_creds)
        s3_write_client = boto3.client('s3', **sts_write_creds)

        # put an object directly using our sts creds
        file_contents = 'saved using sts'
        temp_file = self._make_temp_file(contents=file_contents, suffix='.txt')

        remote_key = f"{folder.name}/sts_saved"

        # verify that the read credentials are in fact read only
        with pytest.raises(Exception) as ex_cm:
            s3_read_client.upload_file(
                Filename=temp_file.name,
                Bucket=bucket_name,
                Key=remote_key,
            )
        assert 'Access Denied' in str(ex_cm.value)

        # now create a file directly in s3 using our STS creds
        s3_write_client.upload_file(
            Filename=temp_file.name,
            Bucket=bucket_name,
            Key=remote_key,
            ExtraArgs={'ACL': 'bucket-owner-full-control'},
        )

        # now read the file using our read credentials
        # S3 is not ACID so we add a retry here to try to ensure our
        # object will be available before we try to create the handle
        with_retry(lambda: s3_read_client.get_object(Bucket=bucket_name,
                                                     Key=remote_key),
                   retry_exceptions=[s3_read_client.exceptions.NoSuchKey])

        # create an external file handle so we can read it via synapse
        file_handle = self.syn.create_external_s3_file_handle(
            bucket_name,
            remote_key,
            temp_file.name,
            storage_location_id=storage_location_id,
        )
        file = File(parentId=folder['id'], dataFileHandleId=file_handle['id'])
        file_entity = self.syn.store(file)

        # now should be able to retrieve the file via synapse
        retrieved_file_entity = self.syn.get(file_entity['id'])
        with open(retrieved_file_entity.path, 'r') as f:
            assert file_contents == f.read()