コード例 #1
0
 def test_master_schema(self, capsys, user_schema, min_datetime,
                        ordered_file_infos, file_schemas, expected_schema,
                        log_expected, error_expected):
     file_format_parser_mock = MagicMock(return_value=MagicMock(
         get_inferred_schema=MagicMock(side_effect=file_schemas)))
     with patch.object(IncrementalFileStreamS3, "fileformatparser_class",
                       file_format_parser_mock):
         with patch.object(IncrementalFileStreamS3,
                           "get_time_ordered_file_infos",
                           MagicMock(return_value=ordered_file_infos)):
             stream_instance = IncrementalFileStreamS3(
                 dataset="dummy",
                 provider={},
                 format={"filetype": "csv"},
                 schema=user_schema,
                 path_pattern="**/prefix*.csv")
             if error_expected:
                 with pytest.raises(RuntimeError):
                     stream_instance._get_master_schema(
                         min_datetime=min_datetime)
             else:
                 assert stream_instance._get_master_schema(
                     min_datetime=min_datetime) == expected_schema
                 if log_expected:
                     captured = capsys.readouterr()
                     assert "Detected mismatched datatype" in captured.out
コード例 #2
0
 def test_fileformatparser_map(self):
     stream_instance = IncrementalFileStreamS3(
         dataset="dummy",
         provider={"bucket": "test-test"},
         format={},
         path_pattern="**/prefix*.csv")
     assert stream_instance.fileformatparser_map
コード例 #3
0
 def test_get_json_schema(self):
     stream_instance = IncrementalFileStreamS3(
         dataset="dummy",
         provider={},
         format={"filetype": "csv"},
         schema={},
         path_pattern="**/prefix*.csv")
     assert stream_instance.get_json_schema() == {
         "properties": {
             "_ab_additional_properties": {
                 "type": "object"
             },
             "_ab_source_file_last_modified": {
                 "format": "date-time",
                 "type": "string"
             },
             "_ab_source_file_url": {
                 "type": "string"
             },
             "column_A": {
                 "type": ["null", "string"]
             },
             "column_B": {
                 "type": ["null", "integer"]
             },
             "column_C": {
                 "type": ["null", "boolean"]
             },
         },
         "type": "object",
     }
コード例 #4
0
 def test_fileformatparser_class(self, file_type, error_expected):
     stream_instance = IncrementalFileStreamS3(
         dataset="dummy",
         provider={},
         format={"filetype": file_type},
         schema={},
         path_pattern="**/prefix*.csv")
     if error_expected:
         with pytest.raises(RuntimeError):
             _ = stream_instance.fileformatparser_class
     else:
         assert stream_instance.fileformatparser_class
コード例 #5
0
 def test_get_schema_map(self):
     stream_instance = IncrementalFileStreamS3(
         dataset="dummy",
         provider={},
         format={"filetype": "csv"},
         schema={},
         path_pattern="**/prefix*.csv")
     assert stream_instance._get_schema_map() == {
         "_ab_additional_properties": "object",
         "_ab_source_file_last_modified": "string",
         "_ab_source_file_url": "string",
         "column_A": "string",
         "column_B": "integer",
         "column_C": "boolean",
     }
コード例 #6
0
    def test_incremental_read(self):
        stream_instance = IncrementalFileStreamS3(
            dataset="dummy",
            provider={"bucket": "test-test"},
            format={},
            path_pattern="**/prefix*.csv")
        stream_instance._list_bucket = MagicMock()

        records = []
        slices = stream_instance.stream_slices(sync_mode=SyncMode.incremental)

        for slice in slices:
            records.extend(
                list(
                    stream_instance.read_records(
                        stream_slice=slice, sync_mode=SyncMode.incremental)))

        assert not records
コード例 #7
0
    def test_filepath_iterator(self, bucket, path_prefix, list_v2_objects,
                               expected_file_info):
        provider = {
            "aws_access_key_id": "key_id",
            "aws_secret_access_key": "access_key"
        }
        s3_client_mock = MagicMock(return_value=MagicMock(
            list_objects_v2=MagicMock(side_effect=list_v2_objects)))
        with patch("source_s3.stream.make_s3_client", s3_client_mock):
            stream_instance = IncrementalFileStreamS3(
                dataset="dummy",
                provider={
                    "bucket": bucket,
                    "path_prefix": path_prefix,
                    **provider
                },
                format={},
                path_pattern="**/prefix*.png",
            )
            expected_info = iter(expected_file_info)

            for file_info in stream_instance.filepath_iterator():
                assert file_info == next(expected_info)
コード例 #8
0
    def test_read(self):
        stream_instance = IncrementalFileStreamS3(
            dataset="dummy",
            provider={"bucket": "test-test"},
            format={},
            path_pattern="**/prefix*.csv")
        stream_instance._list_bucket = MagicMock()

        records = []
        slices = stream_instance.stream_slices(sync_mode=SyncMode.full_refresh)
        for slice in slices:
            records.extend(
                list(
                    stream_instance.read_records(
                        stream_slice=slice,
                        sync_mode=SyncMode.full_refresh,
                        stream_state={
                            "_ab_source_file_last_modified":
                            "1999-01-01T00:00:00+0000"
                        },
                    )))

        assert not records