Exemple #1
0
def test_bulk_sync_successful_long_response(stream_config, stream_api):
    stream: BulkIncrementalSalesforceStream = generate_stream(
        "Account", stream_config, stream_api)
    with requests_mock.Mocker() as m:
        job_id = _prepare_mock(m, stream)
        m.register_uri(
            "GET",
            stream.path() + f"/{job_id}",
            [
                {
                    "json": {
                        "state": "UploadComplete",
                        "id": job_id
                    }
                },
                {
                    "json": {
                        "state": "InProgress",
                        "id": job_id
                    }
                },
                {
                    "json": {
                        "state": "JobComplete",
                        "id": job_id
                    }
                },
            ],
        )
        assert _get_result_id(stream) == 1
Exemple #2
0
def test_stream_contains_unsupported_properties_by_bulk(
        stream_config, stream_api_v2):
    """
    Stream `Account` contains compound field such as BillingAddress, which is not supported by BULK API (csv),
    in that case REST API stream will be used for it.
    """
    stream_name = "Account"
    stream = generate_stream(stream_name, stream_config, stream_api_v2)
    assert not isinstance(stream, BulkSalesforceStream)
Exemple #3
0
def test_stream_has_state_rest_api_should_be_used(stream_config, stream_api):
    """
    Stream `ActiveFeatureLicenseMetric` has state, in that case REST API stream will be used for it.
    """
    stream_name = "ActiveFeatureLicenseMetric"
    state = {stream_name: {"SystemModstamp": "2122-08-22T05:08:29.000Z"}}
    stream = generate_stream(stream_name,
                             stream_config,
                             stream_api,
                             state=state)
    assert not isinstance(stream, BulkSalesforceStream)
Exemple #4
0
def test_pagination_rest(stream_config, stream_api):
    stream_name = "ActiveFeatureLicenseMetric"
    state = {stream_name: {"SystemModstamp": "2122-08-22T05:08:29.000Z"}}

    stream: SalesforceStream = generate_stream(stream_name,
                                               stream_config,
                                               stream_api,
                                               state=state)
    stream.DEFAULT_WAIT_TIMEOUT_SECONDS = 6  # maximum wait timeout will be 6 seconds
    next_page_url = "/services/data/v52.0/query/012345"
    with requests_mock.Mocker() as m:
        resp_1 = {
            "done":
            False,
            "totalSize":
            4,
            "nextRecordsUrl":
            next_page_url,
            "records": [
                {
                    "ID": 1,
                    "LastModifiedDate": "2021-11-15",
                },
                {
                    "ID": 2,
                    "LastModifiedDate": "2021-11-16",
                },
            ],
        }
        resp_2 = {
            "done":
            True,
            "totalSize":
            4,
            "records": [
                {
                    "ID": 3,
                    "LastModifiedDate": "2021-11-17",
                },
                {
                    "ID": 4,
                    "LastModifiedDate": "2021-11-18",
                },
            ],
        }

        m.register_uri("GET", stream.path(), json=resp_1)
        m.register_uri("GET", next_page_url, json=resp_2)

        records = [
            record
            for record in stream.read_records(sync_mode=SyncMode.full_refresh)
        ]
        assert len(records) == 4
Exemple #5
0
def test_bulk_sync_creation_failed(stream_config, stream_api):
    stream: BulkIncrementalSalesforceStream = generate_stream(
        "Account", stream_config, stream_api)
    with requests_mock.Mocker() as m:
        m.register_uri("POST",
                       stream.path(),
                       status_code=400,
                       json=[{
                           "message": "test_error"
                       }])
        with pytest.raises(HTTPError) as err:
            next(stream.read_records(sync_mode=SyncMode.full_refresh))
        assert err.value.response.json()[0]["message"] == "test_error"
Exemple #6
0
def test_bulk_sync_successful_retry(stream_config, stream_api):
    stream: BulkIncrementalSalesforceStream = generate_stream(
        "Account", stream_config, stream_api)
    stream.DEFAULT_WAIT_TIMEOUT_SECONDS = 6  # maximum wait timeout will be 6 seconds

    with requests_mock.Mocker() as m:
        job_id = _prepare_mock(m, stream)
        # 2 failed attempts, 3rd one should be successful
        states = [{"json": {"state": "InProgress", "id": job_id}}] * 17
        states.append({"json": {"state": "JobComplete", "id": job_id}})
        # raise Exception(states)
        m.register_uri("GET", stream.path() + f"/{job_id}", states)
        assert _get_result_id(stream) == 1
Exemple #7
0
def test_bulk_sync_failed_retry(stream_config, stream_api):
    stream: BulkIncrementalSalesforceStream = generate_stream(
        "Account", stream_config, stream_api)
    stream.DEFAULT_WAIT_TIMEOUT_SECONDS = 6  # maximum wait timeout will be 6 seconds
    with requests_mock.Mocker() as m:
        job_id = _prepare_mock(m, stream)
        m.register_uri("GET",
                       stream.path() + f"/{job_id}",
                       json={
                           "state": "InProgress",
                           "id": job_id
                       })
        with pytest.raises(Exception) as err:
            next(stream.read_records(sync_mode=SyncMode.full_refresh))
        assert "stream using BULK API was failed" in str(err.value)
Exemple #8
0
def test_download_data_filter_null_bytes(stream_config, stream_api):
    job_full_url: str = "https://fase-account.salesforce.com/services/data/v52.0/jobs/query/7504W00000bkgnpQAA"
    stream: BulkIncrementalSalesforceStream = generate_stream(
        "Account", stream_config, stream_api)

    with requests_mock.Mocker() as m:
        m.register_uri("GET", f"{job_full_url}/results", content=b"\x00")
        res = list(
            stream.read_with_chunks(stream.download_data(url=job_full_url)))
        assert res == []

        m.register_uri(
            "GET",
            f"{job_full_url}/results",
            content=
            b'"Id","IsDeleted"\n\x00"0014W000027f6UwQAI","false"\n\x00\x00')
        res = list(
            stream.read_with_chunks(stream.download_data(url=job_full_url)))
        assert res == [{"Id": "0014W000027f6UwQAI", "IsDeleted": False}]
Exemple #9
0
def test_memory_download_data(stream_config, stream_api, n_records, first_size,
                              first_peak):
    job_full_url: str = "https://fase-account.salesforce.com/services/data/v52.0/jobs/query/7504W00000bkgnpQAA"
    stream: BulkIncrementalSalesforceStream = generate_stream(
        "Account", stream_config, stream_api)
    content = b'"Id","IsDeleted"'
    for _ in range(n_records):
        content += b'"0014W000027f6UwQAI","false"\n'

    with requests_mock.Mocker() as m:
        m.register_uri("GET", f"{job_full_url}/results", content=content)
        tracemalloc.start()
        for x in stream.read_with_chunks(
                stream.download_data(url=job_full_url)):
            pass
        fs, fp = tracemalloc.get_traced_memory()
        first_size_in_mb, first_peak_in_mb = fs / 1024**2, fp / 1024**2

        assert first_size_in_mb < first_size
        assert first_peak_in_mb < first_peak
Exemple #10
0
def test_convert_to_standard_instance(stream_config, stream_api):
    bulk_stream = generate_stream("Account", stream_config, stream_api)
    rest_stream = bulk_stream.get_standard_instance()
    assert isinstance(rest_stream, IncrementalSalesforceStream)
Exemple #11
0
def test_rate_limit_rest(stream_config, stream_api, configured_catalog, state):
    """
    Connector should stop the sync if one stream reached rate limit
    stream_1, stream_2, stream_3, ...
    While reading `stream_1` if 403 (Rate Limit) is received, it should finish that stream with success and stop the sync process.
    Next streams should not be executed.
    """

    stream_1: IncrementalSalesforceStream = generate_stream("Account",
                                                            stream_config,
                                                            stream_api,
                                                            state=state)
    stream_2: IncrementalSalesforceStream = generate_stream("Asset",
                                                            stream_config,
                                                            stream_api,
                                                            state=state)

    stream_1.state_checkpoint_interval = 3
    configure_request_params_mock(stream_1, stream_2)

    source = SourceSalesforce()
    source.streams = Mock()
    source.streams.return_value = [stream_1, stream_2]

    logger = AirbyteLogger()

    next_page_url = "/services/data/v52.0/query/012345"
    response_1 = {
        "done":
        False,
        "totalSize":
        10,
        "nextRecordsUrl":
        next_page_url,
        "records": [
            {
                "ID": 1,
                "LastModifiedDate": "2021-11-15",
            },
            {
                "ID": 2,
                "LastModifiedDate": "2021-11-16",
            },
            {
                "ID": 3,
                "LastModifiedDate": "2021-11-17",  # check point interval
            },
            {
                "ID": 4,
                "LastModifiedDate": "2021-11-18",
            },
            {
                "ID": 5,
                "LastModifiedDate": "2021-11-19",
            },
        ],
    }
    response_2 = [{
        "errorCode": "REQUEST_LIMIT_EXCEEDED",
        "message": "TotalRequests Limit exceeded."
    }]

    with requests_mock.Mocker() as m:
        m.register_uri("GET",
                       stream_1.path(),
                       json=response_1,
                       status_code=200)
        m.register_uri("GET", next_page_url, json=response_2, status_code=403)

        result = [
            i for i in source.read(logger=logger,
                                   config=stream_config,
                                   catalog=configured_catalog,
                                   state=state)
        ]

        assert stream_1.request_params.called
        assert (
            not stream_2.request_params.called
        ), "The second stream should not be executed, because the first stream finished with Rate Limit."

        records = [item for item in result if item.type == Type.RECORD]
        assert len(records) == 5

        state_record = [item for item in result if item.type == Type.STATE][0]
        assert state_record.state.data["Account"][
            "LastModifiedDate"] == "2021-11-17"
Exemple #12
0
def test_rate_limit_bulk(stream_config, stream_api, configured_catalog, state):
    """
    Connector should stop the sync if one stream reached rate limit
    stream_1, stream_2, stream_3, ...
    While reading `stream_1` if 403 (Rate Limit) is received, it should finish that stream with success and stop the sync process.
    Next streams should not be executed.
    """
    stream_1: BulkIncrementalSalesforceStream = generate_stream(
        "Account", stream_config, stream_api)
    stream_2: BulkIncrementalSalesforceStream = generate_stream(
        "Asset", stream_config, stream_api)
    streams = [stream_1, stream_2]
    configure_request_params_mock(stream_1, stream_2)

    stream_1.page_size = 6
    stream_1.state_checkpoint_interval = 5

    source = SourceSalesforce()
    source.streams = Mock()
    source.streams.return_value = streams
    logger = AirbyteLogger()

    json_response = [{
        "errorCode": "REQUEST_LIMIT_EXCEEDED",
        "message": "TotalRequests Limit exceeded."
    }]
    with requests_mock.Mocker() as m:
        for stream in streams:
            creation_responses = []
            for page in [1, 2]:
                job_id = f"fake_job_{page}_{stream.name}"
                creation_responses.append({"json": {"id": job_id}})

                m.register_uri("GET",
                               stream.path() + f"/{job_id}",
                               json={"state": "JobComplete"})

                resp = ["Field1,LastModifiedDate,ID"
                        ] + [f"test,2021-11-0{i},{i}"
                             for i in range(1, 7)]  # 6 records per page

                if page == 1:
                    # Read the first page successfully
                    m.register_uri("GET",
                                   stream.path() + f"/{job_id}/results",
                                   text="\n".join(resp))
                else:
                    # Requesting for results when reading second page should fail with 403 (Rate Limit error)
                    m.register_uri("GET",
                                   stream.path() + f"/{job_id}/results",
                                   status_code=403,
                                   json=json_response)

                m.register_uri("DELETE", stream.path() + f"/{job_id}")

            m.register_uri("POST", stream.path(), creation_responses)

        result = [
            i for i in source.read(logger=logger,
                                   config=stream_config,
                                   catalog=configured_catalog,
                                   state=state)
        ]
        assert stream_1.request_params.called
        assert (
            not stream_2.request_params.called
        ), "The second stream should not be executed, because the first stream finished with Rate Limit."

        records = [item for item in result if item.type == Type.RECORD]
        assert len(records) == 6  # stream page size: 6

        state_record = [item for item in result if item.type == Type.STATE][0]
        assert state_record.state.data["Account"][
            "LastModifiedDate"] == "2021-11-05"  # state checkpoint interval is 5.
Exemple #13
0
def test_stream_start_datetime_format_should_not_changed(
        stream_config, stream_api):
    stream: IncrementalSalesforceStream = generate_stream(
        "ActiveFeatureLicenseMetric", stream_config, stream_api)
    assert stream.start_date == "2010-01-18T21:18:20Z"
Exemple #14
0
def test_stream_start_date_should_be_converted_to_datetime_format(
        stream_config_date_format, stream_api):
    stream: IncrementalSalesforceStream = generate_stream(
        "ActiveFeatureLicenseMetric", stream_config_date_format, stream_api)
    assert stream.start_date == "2010-01-18T00:00:00Z"