Beispiel #1
0
def convert_to_response_dict(http_response, operation_model):
    """Convert an HTTP response object to a request dict.

    This converts the requests library's HTTP response object to
    a dictionary.

    :type http_response: botocore.vendored.requests.model.Response
    :param http_response: The HTTP response from an AWS service request.

    :rtype: dict
    :return: A response dictionary which will contain the following keys:
        * headers (dict)
        * status_code (int)
        * body (string or file-like object)

    """
    response_dict = {
        'headers': http_response.headers,
        'status_code': http_response.status_code,
    }
    if response_dict['status_code'] >= 300:
        response_dict['body'] = http_response.content
    elif operation_model.has_streaming_output:
        response_dict['body'] = StreamingBody(
            http_response.raw, response_dict['headers'].get('content-length'))
    else:
        response_dict['body'] = http_response.content
    return response_dict
Beispiel #2
0
def test_s3_csv_column_types(mock_client, csv_content):
    mock_client().head_object.return_value = {"ContentType": "text/csv"}
    mock_client().get_object.return_value = {
        "ContentType": "text/plain",
        "ContentLength": len(csv_content),
        "Body": StreamingBody(io.BytesIO(csv_content), len(csv_content)),
    }
    response = get_s3_csv_file_info("/a/path.csv")

    assert response["encoding"] == "utf-8-sig"

    assert response["column_definitions"] == [
        {
            "header_name": "col1",
            "column_name": "col1",
            "data_type": PostgresDataTypes.TEXT,
            "sample_data": ["row1-col1", "row2\ncol1"],
        },
        {
            "header_name": "col2",
            "column_name": "col2",
            "data_type": PostgresDataTypes.BIGINT,
            "sample_data": ["1", "2"],
        },
    ]
Beispiel #3
0
def _cast_blob(
    definition: dict,
    value: typing.Any,
) -> typing.Union[bytes, StreamingBody]:
    """
    Convert a blob botocore type into its formatted value.

    If the definition indicates that the blob is returned in streaming format,
    a StreamingBody object is returned instead with the bytes value
    injected into the body via an InternalStreamer object that mimics the
    actual streaming behavior of a real StreamingBody response.

    :param definition:
        Specification definition for the associated value to cast.
    :param value:
        A loaded value to be cast into its boto client response value.
    :return:
        The cast version of the specified value that matches the format of
        the value as it would be returned in a boto client response.
    """
    if isinstance(value, bytes):
        output = value
    else:
        output = str(value).encode()

    if definition.get("streaming"):
        output = StreamingBody(InternalStreamer(output), len(output))
    return output
Beispiel #4
0
    def test_reads_s3_config(self):
        s3 = boto3.client("s3")

        bucket = "config_bucket"
        key = "config_key"

        with open(TEST_PATH_CONFIG_CLUSTER, "r") as f:
            config_content = f.read()

        expected = yaml.safe_load(config_content)
        with Stubber(s3) as stubber:
            stream = BytesIO(bytes(config_content, encoding="utf-8"))
            stream.seek(0, os.SEEK_END)
            size = stream.tell()
            stream.seek(0, os.SEEK_SET)
            stubber.add_response(
                "get_object",
                {"Body": StreamingBody(stream, size)},
                {
                    "Bucket": bucket,
                    "Key": key
                },
            )
            actual = ClusterConfig.from_s3(bucket=bucket,
                                           key=key,
                                           s3_client=s3)

        assert expected == actual
Beispiel #5
0
    def test_get_object(self):
        bucket = "random-bucket"
        key = "hello/1.png"
        mock_metadata = {"height": "100", "width": "200"}
        expected_json_file = {"hello": "world"}
        encoded_message = json.dumps(expected_json_file).encode()
        raw_stream = StreamingBody(io.BytesIO(encoded_message), len(encoded_message))
        mock_response = {"Body": raw_stream, "Metadata": mock_metadata}
        expected_params = {"Bucket": bucket, "Key": key}
        stubber.add_response("get_object", mock_response, expected_params)
        with stubber:
            response = self.s3.get_object(bucket, key)
            self.assertEqual(response, expected_json_file)

            with self.assertRaisesRegex(Exception, "Bucket, key are required"):
                self.s3.get_object(bucket, None)
            with self.assertRaisesRegex(Exception, "Bucket, key are required"):
                self.s3.get_object(None, None)
            with self.assertRaisesRegex(Exception, "Bucket, key are required"):
                self.s3.get_object(None, key)

        stubber.add_client_error("get_object")
        with stubber:
            with self.assertRaises(ClientError):
                self.s3.get_object(bucket, key)
    def test_ship(self):
        parser = Mock(Parser)
        timestamp = datetime.now().isoformat()

        path_groks = {
            "timestamp": timestamp,
            "message": "Hello",
            "level": "INFO"
        }
        log_groks = {"cluster": "foo12345", "node": "abc1234"}

        parser.parse_log.return_value = path_groks
        self.parser_manager.get_parser.return_value = parser, log_groks
        self.s3_client.add_response(
            method="get_object",
            service_response={"Body": StreamingBody(io.BytesIO(b"HELLO"), 5)},
            expected_params={
                "Bucket": ANY,
                "Key": ANY
            },
        )
        self.s3_client.activate()
        self.under_test.ship("foo", "bar.log")

        expected = log_groks.copy()
        expected.update(path_groks)

        for call in self.redis_client.rpush.call_args_list:
            q, data = call[0]
            self.assertEqual(q, "logstash")
            self.assertEqual(json.loads(data), expected)
 def test_download_local_file(self, mock_client, request_client, published):
     dataset = factories.DataSetFactory.create(
         published=published, user_access_type='REQUIRES_AUTHENTICATION')
     link = factories.SourceLinkFactory(
         id='158776ec-5c40-4c58-ba7c-a3425905ec45',
         dataset=dataset,
         link_type=SourceLink.TYPE_LOCAL,
         url='s3://sourcelink/158776ec-5c40-4c58-ba7c-a3425905ec45/test.txt',
     )
     log_count = EventLog.objects.count()
     download_count = dataset.number_of_downloads
     mock_client().get_object.return_value = {
         'ContentType':
         'text/plain',
         'Body':
         StreamingBody(io.BytesIO(b'This is a test file'),
                       len(b'This is a test file')),
     }
     response = request_client.get(
         reverse(
             'datasets:dataset_source_link_download',
             kwargs={
                 'dataset_uuid': dataset.id,
                 'source_link_id': link.id
             },
         ))
     assert response.status_code == 200
     assert list(response.streaming_content)[0] == b'This is a test file'
     mock_client().get_object.assert_called_with(
         Bucket=settings.AWS_UPLOADS_BUCKET, Key=link.url)
     assert EventLog.objects.count() == log_count + 1
     assert (EventLog.objects.latest().event_type ==
             EventLog.TYPE_DATASET_SOURCE_LINK_DOWNLOAD)
     assert (DataSet.objects.get(
         pk=dataset.id).number_of_downloads == download_count + 1)
Beispiel #8
0
    def test_storage(self):
        region = 'ap-southeast-1'
        bucket_name = 'mock-bucket-' + uuid.uuid4().hex
        s3 = boto3.resource('s3', region_name=region)
        stubber = Stubber(s3.meta.client)
        s3_bucket = s3.Bucket(bucket_name)

        bucket = S3Bucket(region, s3_bucket)
        storage = S3Storage(bucket)

        key = 'mock-key'
        mock_content = b''
        body = StreamingBody(None, len(mock_content))
        body.read = Mock(return_value=mock_content)
        body.close = Mock()

        stubber.add_response('put_object', {})
        stubber.add_response('get_object', {
            'Body': body
        })
        stubber.add_response('delete_object', {})
        stubber.activate()

        f = storage.save(key, mock_content)
        self.assertEqual(f.content, mock_content)
        f.delete()
Beispiel #9
0
def get_object_response(data):
    body = StreamingBody(io.BytesIO(data), len(data))
    return {
        'ResponseMetadata': {
            'HTTPStatusCode': 200,
        },
        'Body': body,
    }
Beispiel #10
0
def test_streaming_s3_objects():
    # GH17135
    # botocore gained iteration support in 1.10.47, can now be used in read_*
    pytest.importorskip("botocore", minversion="1.10.47")
    from botocore.response import StreamingBody

    data = [b"foo,bar,baz\n1,2,3\n4,5,6\n", b"just,the,header\n"]
    for el in data:
        body = StreamingBody(BytesIO(el), content_length=len(el))
        read_csv(body)
Beispiel #11
0
    def should_return_when_empty_file_checksum_matches(
            self, get_object_mock: MagicMock) -> None:
        get_object_mock.return_value = {"Body": StreamingBody(BytesIO(), 0)}

        with patch(
                "backend.check_files_checksums.utils.processing_assets_model_with_meta"
        ):
            ChecksumValidator(any_table_name(), MockValidationResultFactory(),
                              self.logger).validate_url_multihash(
                                  any_s3_url(), EMPTY_FILE_MULTIHASH)
Beispiel #12
0
def gen_s3_object_content(
        content: Union[Dict[str, Any], str]) -> StreamingBody:
    """Convert a string or dict to S3 object body.

    Args:
        content: S3 object body.

    """
    if isinstance(content, dict):
        content = json.dumps(content, default=json_serial)
    encoded_content = content.encode()
    return StreamingBody(io.BytesIO(encoded_content), len(encoded_content))
 def setUp(self):
     self.crawler = get_crawler(Spider)
     self.spider = self.crawler._create_spider(self.spider_name)
     self.tmpdir = tempfile.mkdtemp()
     self.request = Request('http://www.example.com',
                            headers={'User-Agent': 'test'})
     self.response = Response('http://www.example.com',
                              headers={'Content-Type': 'text/html'},
                              body=b'test body',
                              status=202)
     self.crawler.stats.open_spider(self.spider)
     self.cached_response = {
         'meta': {
             'url': self.request.url,
             'method': self.request.method,
             'status': self.response.status,
             'response_url': self.response.url,
             'timestamp': time.time(),
         },
         'response_headers': headers_dict_to_raw(self.response.headers),
         'response_body': self.response.body,
         'request_headers': headers_dict_to_raw(self.request.headers),
         'request_body': self.request.body
     }
     self.pickled_cached_response = pickle.dumps(self.cached_response)
     self.get_object_response = {
         'Body': StreamingBody(
             io.BytesIO(self.pickled_cached_response),
             len(self.pickled_cached_response)
         )
     }
     self.gzipped_pickled_cached_response = gzip.compress(self.pickled_cached_response)
     self.get_object_response_gziped = {
         'Body': StreamingBody(
             io.BytesIO(self.gzipped_pickled_cached_response),
             len(self.gzipped_pickled_cached_response)
         )
     }
Beispiel #14
0
def test_read_only():
    encrypt_key_arn = 'arn:aws:kms:region:account_id:key/guid'
    bucket_name = 'bucket'
    file_name = 'conf.json'
    settings = {'setting_1': 'foo'}
    s3 = botocore.session.get_session().create_client('s3')
    conn = conf.conn(encrypt_key_arn, client=s3)

    expected_put_response = {
        'Expiration': 'string',
        'ETag': 'string',
        'ServerSideEncryption': 'AES256',
        'VersionId': 'string',
        'SSECustomerAlgorithm': 'string',
        'SSECustomerKeyMD5': 'string',
        'SSEKMSKeyId': 'string',
        'RequestCharged': 'requester'
    }

    put_parameters = {
        'Body': json.dumps(settings),
        'Bucket': bucket_name,
        'Key': file_name,
        'SSEKMSKeyId': 'arn:aws:kms:region:account_id:key/guid',
        'ServerSideEncryption': 'aws:kms'
    }
    data = BytesIO(json.dumps(settings).encode('utf-8'))
    # data.seek(0)
    expected_get_response = {
        'Body': StreamingBody(raw_stream=data, content_length=20)
    }
    get_parameters = {'Bucket': bucket_name, 'Key': file_name}

    with Stubber(s3) as stubber:
        # verify we can put data in
        stubber.add_response('put_object', expected_put_response,
                             put_parameters)
        saved_settings = conf.save(conn, bucket_name, file_name, settings)
        assert saved_settings == settings

        # verify we can get data out
        stubber.add_response('get_object', expected_get_response,
                             get_parameters)
        loaded_settings = conf.read_only(conn, bucket_name, file_name)
        assert loaded_settings == settings

        # verify cache has data
        assert conf.read_only_cache.currsize == 1
        # verify cache has the right data
        assert conf.read_only_cache[('bucket', 'conf.json')] == settings
def retrieve_setup(arntask, creds, device_value, completed_value,
                   results_json):
    """Retrieve test setup."""
    body = StreamingBody(StringIO(results_json), len(results_json))

    results_dict = {
        'ResponseMetadata': {
            'RequestId': 'CF4CAA48CC18836C',
            'HTTPHeaders': {},
        },
        'Body': body,
    }

    return arntask, creds, completed_value, device_value, results_dict
Beispiel #16
0
 def test_load_data_dump_to_dict_object(self):
     bucket = "codTest"
     key = "key"
     expected = {"test": 1}
     expected_json = json.dumps(expected)
     params = dict(Bucket=bucket, Key=key)
     file_obj = io.BytesIO(expected_json.encode('utf-8'))
     stream = StreamingBody(file_obj, content_length=len(expected_json))
     response = {'Body': stream}
     self.stubber.add_response('get_object', response, params)
     self.stubber.activate()
     out = self.helper.load_data_dump_to_dict_object(s3_bucket=bucket,
                                                     file_key=key)
     self.assertEqual(expected, out)
Beispiel #17
0
    def should_raise_exception_when_checksum_does_not_match(
        self, get_object_mock: MagicMock
    ) -> None:
        get_object_mock.return_value = {"Body": StreamingBody(BytesIO(), 0)}

        checksum = "0" * 64
        with raises(ChecksumMismatchError), patch(
            "backend.check_files_checksums.utils.processing_assets_model_with_meta"
        ):
            ChecksumValidator(
                any_table_name(), MockValidationResultFactory(), self.logger
            ).validate_url_multihash(
                any_s3_url(), f"{SHA2_256:x}{SHA256_CHECKSUM_BYTE_COUNT:x}{checksum}"
            )
Beispiel #18
0
    def should_return_when_file_checksum_matches(self, get_object_mock: MagicMock) -> None:
        file_contents = b"x" * (CHUNK_SIZE + 1)
        get_object_mock.return_value = {
            "Body": StreamingBody(BytesIO(initial_bytes=file_contents), len(file_contents))
        }
        multihash = (
            f"{SHA2_256:x}{SHA256_CHECKSUM_BYTE_COUNT:x}"
            "c6d8e9905300876046729949cc95c2385221270d389176f7234fe7ac00c4e430"
        )

        with patch("backend.check_files_checksums.utils.processing_assets_model_with_meta"):
            ChecksumValidator(
                any_table_name(), MockValidationResultFactory(), self.logger
            ).validate_url_multihash(any_s3_url(), multihash)
Beispiel #19
0
def _build_response(content, version, content_type):
    if content_type == "application/json":
        content_text = json.dumps(content).encode("utf-8")
    elif content_type == "application/x-yaml":
        content_text = str(yaml.dump(content)).encode("utf-8")
    else:
        content_text = content.encode("utf-8")
    return {
        "Content":
        StreamingBody(io.BytesIO(bytes(content_text)), len(content_text)),
        "ConfigurationVersion":
        version,
        "ContentType":
        content_type,
    }
Beispiel #20
0
    def test_get_policy_content_s3(self):
        # setup
        expected_result = self.policy
        self.sut.policy_content = dict(s3=dict(bucket="my_bucket", key="my_key"))

        encoded_policy = json.dumps(self.policy).encode("utf-8")
        self.hub_client_mock.get_object.return_value = {
            "Body": StreamingBody(io.BytesIO(encoded_policy), len(encoded_policy))
        }

        # exercise
        actual_result = self.sut.get_unwrapped_policy()

        # verify
        self.assertEqual(expected_result, actual_result)
Beispiel #21
0
def gen_s3_object_content(content):
    """Convert a string or dict to S3 object body.

    Args:
        content (Union[str, Dict[str, Any]]): S3 object body

    Returns:
        botocore.response.StreamingBody Used in the Body of a
            s3.get_object response.

    """
    if isinstance(content, dict):
        content = json.dumps(content, default=json_serial)
    encoded_content = content.encode()
    return StreamingBody(io.BytesIO(encoded_content), len(encoded_content))
def mock_get_object_response(raw_body: str) -> Dict[str, Any]:
    """
    Mock s3 client get_object() response object.

    See https://gist.github.com/grantcooksey/132ddc85274a50b94b821302649f9d7b

    Parameters
    ----------
        raw_body:
            Content of the 'Body' field to return
    """

    encoded_message = raw_body.encode("utf-8")
    raw_stream = StreamingBody(io.BytesIO(encoded_message), len(encoded_message))

    return {"Body": raw_stream}
Beispiel #23
0
def test_media_s3_valid_file(mocker, client):
    mock_client = mocker.patch(
        "dataworkspace.apps.core.boto3_client.boto3.client")
    file_content = b"some file content stored on s3"
    mock_client().get_object.return_value = {
        "ContentType": "text/plain",
        "ContentLength": len(file_content),
        "Body": StreamingBody(io.BytesIO(file_content), len(file_content)),
    }
    response = client.get(
        reverse("uploaded-media") + "?path=uploaded-media/test.txt")
    assert response.status_code == 200
    assert list(
        response.streaming_content)[0] == b"some file content stored on s3"
    assert response["content-length"] == str(
        len(b"some file content stored on s3"))
Beispiel #24
0
 def test_preview_csv(self, mock_client):
     link = factories.SourceLinkFactory(url="s3://a/path/to/a/file.csv")
     mock_client().head_object.return_value = {"ContentType": "text/csv"}
     csv_content = b"col1,col2\nrow1-col1, row1-col2\nrow2-col1, row2-col2\ntrailing"
     mock_client().get_object.return_value = {
         "ContentType": "text/plain",
         "ContentLength": len(csv_content),
         "Body": StreamingBody(io.BytesIO(csv_content), len(csv_content)),
     }
     assert link.get_preview_data() == (
         ["col1", "col2"],
         [
             OrderedDict([("col1", "row1-col1"), ("col2", " row1-col2")]),
             OrderedDict([("col1", "row2-col1"), ("col2", " row2-col2")]),
         ],
     )
def test_lambda_function(FunctionName,Payload):
    try:
        client = boto3.client('lambda')
        response = client.invoke(
            FunctionName=FunctionName,
            Payload=Payload
        )         
    except ClientError as e:
        print("Unexpected error: {}".format(e))
        return {'Status': False}
    if response['ResponseMetadata']['HTTPStatusCode'] == 200:
        responsePayload = response['Payload']
        lambda_output = StreamingBody(responsePayload,response['ResponseMetadata']['HTTPHeaders']['content-length']).read().decode('utf-8')
        return {'Status': True,'lambda_output': lambda_output}
    else:
        return {'Status': False}
def incomplete_read_error(lambda_function,
                          runtime_variables,
                          environment_variables,
                          file_list,
                          wrangler_name,
                          expected_message="Incomplete Lambda response"):
    """
    Function to trigger an incomplete read error in a given wrangler.

    Takes in a valid file(s) so that the function performs until after the lambda invoke.

    The data that triggers the incomplete_read is generic, so hardcoded as a variable.

    :param lambda_function: Lambda function to test - Type: Function
    :param runtime_variables: Runtime variables to send to function - Type: Dict
    :param environment_variables: Environment Vars to send to function - Type: Dict
    :param file_list: List of input files for the function - Type: List
    :param wrangler_name: Wrangler that is being tested,
            used in mocking boto3. - Type: String
    :param expected_message: - Error message we are expecting. - Type: String
            (default to match current exception handling)
    :return Test Pass/Fail
    """

    bucket_name = environment_variables["bucket_name"]
    client = create_bucket(bucket_name)
    upload_files(client, bucket_name, file_list)

    with mock.patch(wrangler_name + ".boto3.client") as mock_client:
        mock_client_object = mock.Mock()
        mock_client.return_value = mock_client_object

        test_data_bad = io.BytesIO(b'{"Bad Bytes": 999}')
        mock_client_object.invoke.return_value = {
            "Payload": StreamingBody(test_data_bad, 1)
        }
        with pytest.raises(exception_classes.LambdaFailure) as exc_info:
            if not environment_variables:
                lambda_function.lambda_handler(runtime_variables,
                                               context_object)
            else:
                with mock.patch.dict(lambda_function.os.environ,
                                     environment_variables):
                    lambda_function.lambda_handler(runtime_variables,
                                                   context_object)

        assert expected_message in exc_info.value.error_message
    def get_response_from_file(self, file_name):

        with open(file_name, 'r') as file:
            data = file.read()

        body_encoded = data.encode()

        body = StreamingBody(
            io.BytesIO(body_encoded),
            len(body_encoded)
        )

        response = {
            'Body': body
        }

        return response
Beispiel #28
0
def test_make_boto_response_json_serializable_succeeds():
    test_string = "hello world"
    test_int = 1
    test_bool = True
    test_none = None
    test_float = 1.0
    streaming_body_bytes = bytes(test_string, "utf-8")
    test_streaming_body = StreamingBody(
        io.BytesIO(streaming_body_bytes), len(streaming_body_bytes)
    )
    test_list = [test_string, test_int, test_float]

    test_datetime = datetime(2016, 6, 23)

    test_dict = {
        "str": test_string,
        "int": test_int,
        "float": test_float,
        "bool": test_bool,
        "None": test_none,
        "datetime": test_datetime,
        "list": test_list,
        "dict": {
            "str": test_int,
            "int": test_int,
            "datetime": test_datetime,
            "StreamingBody": test_streaming_body,
        },
    }

    expected_output = {
        "str": test_string,
        "int": test_int,
        "float": test_float,
        "bool": test_bool,
        "None": test_none,
        "datetime": str(test_datetime),
        "list": test_list,
        "dict": {
            "str": test_int,
            "int": test_int,
            "datetime": str(test_datetime),
            "StreamingBody": test_string,
        },
    }
    assert make_boto_response_json_serializable(test_dict) == expected_output
def test_bad_json(appconfig_stub, mocker):
    client, stub, session = appconfig_stub
    content_text = """{"broken": "json",}""".encode("utf-8")
    _add_start_stub(stub)
    broken_response = _build_response({}, "application/json")
    broken_response["Configuration"] = StreamingBody(
        io.BytesIO(bytes(content_text)), len(content_text))
    stub.add_response(
        "get_latest_configuration",
        broken_response,
        _build_request(),
    )
    mocker.patch.object(boto3, "client", return_value=client)
    a = AppConfigHelper("AppConfig-App", "AppConfig-Env", "AppConfig-Profile",
                        15)
    with pytest.raises(ValueError):
        a.update_config()
def _build_response(content, content_type, next_token="token5678", poll=30):
    if content_type == "application/json":
        content_text = json.dumps(content).encode("utf-8")
    elif content_type == "application/x-yaml":
        content_text = str(yaml.dump(content)).encode("utf-8")
    else:
        content_text = content.encode("utf-8")
    return {
        "Configuration":
        StreamingBody(io.BytesIO(bytes(content_text)), len(content_text)),
        "ContentType":
        content_type,
        "NextPollConfigurationToken":
        next_token,
        "NextPollIntervalInSeconds":
        poll,
    }