Exemple #1
0
    def handle_call_rate_limit(self, response, params):
        if "batch" in params:
            max_call_count = 0
            max_pause_interval = self.pause_interval

            for record in response.json():
                headers = {
                    header["name"].lower(): header["value"]
                    for header in record["headers"]
                }
                call_count, pause_interval = self.parse_call_rate_header(
                    headers)
                max_call_count = max(max_call_count, call_count)
                max_pause_interval = max(max_pause_interval, pause_interval)

            if max_call_count > self.call_rate_threshold:
                logger.warn(
                    f"Utilization is too high ({max_call_count})%, pausing for {max_pause_interval}"
                )
                sleep(max_pause_interval.total_seconds())
        else:
            headers = response.headers()
            call_count, pause_interval = self.parse_call_rate_header(headers)
            if call_count > self.call_rate_threshold or pause_interval:
                logger.warn(
                    f"Utilization is too high ({call_count})%, pausing for {pause_interval}"
                )
                sleep(pause_interval.total_seconds())
Exemple #2
0
    def handle_call_rate_limit(self, response, params):
        if "batch" in params:
            max_usage = 0
            max_pause_interval = self.pause_interval_minimum

            for record in response.json():
                headers = {
                    header["name"].lower(): header["value"]
                    for header in record["headers"]
                }
                usage, pause_interval = self.parse_call_rate_header(headers)
                max_usage = max(max_usage, usage)
                max_pause_interval = max(max_pause_interval, pause_interval)

            if max_usage > self.call_rate_threshold:
                max_pause_interval = max(max_pause_interval,
                                         self.pause_interval_minimum)
                logger.warn(
                    f"Utilization is too high ({max_usage})%, pausing for {max_pause_interval}"
                )
                sleep(max_pause_interval.total_seconds())
        else:
            headers = response.headers()
            usage, pause_interval = self.parse_call_rate_header(headers)
            if usage > self.call_rate_threshold or pause_interval:
                pause_interval = max(pause_interval,
                                     self.pause_interval_minimum)
                logger.warn(
                    f"Utilization is too high ({usage})%, pausing for {pause_interval}"
                )
                sleep(pause_interval.total_seconds())
Exemple #3
0
    def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]:
        """
        Create and retrieve the report.
        Decrypt and parse the report is its fully proceed, then yield the report document records.
        """
        report_payload = {}
        is_processed = False
        is_done = False
        start_time = pendulum.now("utc")
        seconds_waited = 0
        report_id = self._create_report()["reportId"]

        # create and retrieve the report
        while not is_processed and seconds_waited < REPORTS_MAX_WAIT_SECONDS:
            report_payload = self._retrieve_report(report_id=report_id)
            seconds_waited = (pendulum.now("utc") - start_time).seconds
            is_processed = report_payload.get("processingStatus") not in ["IN_QUEUE", "IN_PROGRESS"]
            is_done = report_payload.get("processingStatus") == "DONE"
            time.sleep(self.sleep_seconds)

        if is_done:
            # retrieve and decrypt the report document
            document_id = report_payload["reportDocumentId"]
            request_headers = self.request_headers()
            request = self._create_prepared_request(
                path=self.path(document_id=document_id),
                headers=dict(request_headers, **self.authenticator.get_auth_header()),
                params=self.request_params(),
            )
            response = self._send_request(request)
            yield from self.parse_response(response)
        else:
            logger.warn(f"There are no report document related in stream `{self.name}`. Report body {report_payload}")
Exemple #4
0
 def handle_call_rate_limit(self, response, params):
     headers = response.headers()
     call_count, pause_interval = self.parse_call_rate_header(headers)
     if call_count > self.call_rate_threshold or pause_interval:
         logger.warn(
             f"Utilization is too high ({call_count})%, pausing for {pause_interval}"
         )
         sleep(pause_interval.total_seconds())
Exemple #5
0
    def read_records(
        self,
        sync_mode: SyncMode,
        cursor_field: List[str] = None,
        stream_slice: Mapping[str, Any] = None,
        stream_state: Mapping[str, Any] = None,
    ) -> Iterable[Mapping[str, Any]]:
        """
        Create and retrieve the report.
        Decrypt and parse the report is its fully proceed, then yield the report document records.
        """
        report_payload = {}
        is_processed = False
        is_done = False
        start_time = pendulum.now("utc")
        seconds_waited = 0
        report_id = self._create_report(sync_mode, cursor_field, stream_slice,
                                        stream_state)["reportId"]

        # create and retrieve the report
        while not is_processed and seconds_waited < self.max_wait_seconds:
            report_payload = self._retrieve_report(report_id=report_id)
            seconds_waited = (pendulum.now("utc") - start_time).seconds
            is_processed = report_payload.get("processingStatus") not in [
                "IN_QUEUE", "IN_PROGRESS"
            ]
            is_done = report_payload.get("processingStatus") == "DONE"
            is_cancelled = report_payload.get(
                "processingStatus") == "CANCELLED"
            is_fatal = report_payload.get("processingStatus") == "FATAL"
            time.sleep(self.sleep_seconds)

        if is_done:
            # retrieve and decrypt the report document
            document_id = report_payload["reportDocumentId"]
            request_headers = self.request_headers()
            request = self._create_prepared_request(
                path=self.path(document_id=document_id),
                headers=dict(request_headers,
                             **self.authenticator.get_auth_header()),
                params=self.request_params(),
            )
            response = self._send_request(request)
            yield from self.parse_response(response)
        elif is_fatal:
            raise Exception(
                f"The report for stream '{self.name}' was aborted due to a fatal error"
            )
        elif is_cancelled:
            logger.warn(
                f"The report for stream '{self.name}' was cancelled or there is no data to return"
            )
        else:
            raise Exception(
                f"Unknown response for stream `{self.name}`. Response body {report_payload}"
            )
Exemple #6
0
    def _get_field_props(field_type: str) -> Mapping[str, List[str]]:

        if field_type in VALID_JSON_SCHEMA_TYPES:
            return {
                "type": ["null", field_type],
            }

        converted_type, field_format = KNOWN_CONVERTIBLE_SCHEMA_TYPES.get(
            field_type) or (None, None)

        if not converted_type:
            converted_type = "string"
            logger.warn(f"Unsupported type {field_type} found")

        field_props = {
            "type": ["null", converted_type or field_type],
        }

        if field_format:
            field_props["format"] = field_format

        return field_props