def handle_call_rate_limit(self, response, params): if "batch" in params: max_call_count = 0 max_pause_interval = self.pause_interval for record in response.json(): headers = { header["name"].lower(): header["value"] for header in record["headers"] } call_count, pause_interval = self.parse_call_rate_header( headers) max_call_count = max(max_call_count, call_count) max_pause_interval = max(max_pause_interval, pause_interval) if max_call_count > self.call_rate_threshold: logger.warn( f"Utilization is too high ({max_call_count})%, pausing for {max_pause_interval}" ) sleep(max_pause_interval.total_seconds()) else: headers = response.headers() call_count, pause_interval = self.parse_call_rate_header(headers) if call_count > self.call_rate_threshold or pause_interval: logger.warn( f"Utilization is too high ({call_count})%, pausing for {pause_interval}" ) sleep(pause_interval.total_seconds())
def handle_call_rate_limit(self, response, params): if "batch" in params: max_usage = 0 max_pause_interval = self.pause_interval_minimum for record in response.json(): headers = { header["name"].lower(): header["value"] for header in record["headers"] } usage, pause_interval = self.parse_call_rate_header(headers) max_usage = max(max_usage, usage) max_pause_interval = max(max_pause_interval, pause_interval) if max_usage > self.call_rate_threshold: max_pause_interval = max(max_pause_interval, self.pause_interval_minimum) logger.warn( f"Utilization is too high ({max_usage})%, pausing for {max_pause_interval}" ) sleep(max_pause_interval.total_seconds()) else: headers = response.headers() usage, pause_interval = self.parse_call_rate_header(headers) if usage > self.call_rate_threshold or pause_interval: pause_interval = max(pause_interval, self.pause_interval_minimum) logger.warn( f"Utilization is too high ({usage})%, pausing for {pause_interval}" ) sleep(pause_interval.total_seconds())
def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]: """ Create and retrieve the report. Decrypt and parse the report is its fully proceed, then yield the report document records. """ report_payload = {} is_processed = False is_done = False start_time = pendulum.now("utc") seconds_waited = 0 report_id = self._create_report()["reportId"] # create and retrieve the report while not is_processed and seconds_waited < REPORTS_MAX_WAIT_SECONDS: report_payload = self._retrieve_report(report_id=report_id) seconds_waited = (pendulum.now("utc") - start_time).seconds is_processed = report_payload.get("processingStatus") not in ["IN_QUEUE", "IN_PROGRESS"] is_done = report_payload.get("processingStatus") == "DONE" time.sleep(self.sleep_seconds) if is_done: # retrieve and decrypt the report document document_id = report_payload["reportDocumentId"] request_headers = self.request_headers() request = self._create_prepared_request( path=self.path(document_id=document_id), headers=dict(request_headers, **self.authenticator.get_auth_header()), params=self.request_params(), ) response = self._send_request(request) yield from self.parse_response(response) else: logger.warn(f"There are no report document related in stream `{self.name}`. Report body {report_payload}")
def handle_call_rate_limit(self, response, params): headers = response.headers() call_count, pause_interval = self.parse_call_rate_header(headers) if call_count > self.call_rate_threshold or pause_interval: logger.warn( f"Utilization is too high ({call_count})%, pausing for {pause_interval}" ) sleep(pause_interval.total_seconds())
def read_records( self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_slice: Mapping[str, Any] = None, stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: """ Create and retrieve the report. Decrypt and parse the report is its fully proceed, then yield the report document records. """ report_payload = {} is_processed = False is_done = False start_time = pendulum.now("utc") seconds_waited = 0 report_id = self._create_report(sync_mode, cursor_field, stream_slice, stream_state)["reportId"] # create and retrieve the report while not is_processed and seconds_waited < self.max_wait_seconds: report_payload = self._retrieve_report(report_id=report_id) seconds_waited = (pendulum.now("utc") - start_time).seconds is_processed = report_payload.get("processingStatus") not in [ "IN_QUEUE", "IN_PROGRESS" ] is_done = report_payload.get("processingStatus") == "DONE" is_cancelled = report_payload.get( "processingStatus") == "CANCELLED" is_fatal = report_payload.get("processingStatus") == "FATAL" time.sleep(self.sleep_seconds) if is_done: # retrieve and decrypt the report document document_id = report_payload["reportDocumentId"] request_headers = self.request_headers() request = self._create_prepared_request( path=self.path(document_id=document_id), headers=dict(request_headers, **self.authenticator.get_auth_header()), params=self.request_params(), ) response = self._send_request(request) yield from self.parse_response(response) elif is_fatal: raise Exception( f"The report for stream '{self.name}' was aborted due to a fatal error" ) elif is_cancelled: logger.warn( f"The report for stream '{self.name}' was cancelled or there is no data to return" ) else: raise Exception( f"Unknown response for stream `{self.name}`. Response body {report_payload}" )
def _get_field_props(field_type: str) -> Mapping[str, List[str]]: if field_type in VALID_JSON_SCHEMA_TYPES: return { "type": ["null", field_type], } converted_type, field_format = KNOWN_CONVERTIBLE_SCHEMA_TYPES.get( field_type) or (None, None) if not converted_type: converted_type = "string" logger.warn(f"Unsupported type {field_type} found") field_props = { "type": ["null", converted_type or field_type], } if field_format: field_props["format"] = field_format return field_props