コード例 #1
0
ファイル: streams.py プロジェクト: Mu-L/airbyte
    def request_params(self,
                       stream_state: Mapping[str, Any],
                       next_page_token: Mapping[str, Any] = None,
                       **kwargs) -> MutableMapping[str, Any]:
        if next_page_token:
            return dict(next_page_token)

        # for finance APIs, end date-time must be no later than two minutes before the request was submitted
        end_date = pendulum.now("utc").subtract(
            minutes=2, seconds=10).strftime(DATE_TIME_FORMAT)
        if self._replication_end_date:
            end_date = self._replication_end_date

        # start date and end date should not be more than 180 days apart.
        start_date = max(pendulum.parse(self._replication_start_date),
                         pendulum.parse(end_date).subtract(
                             days=180)).strftime(DATE_TIME_FORMAT)

        # logging to make sure user knows taken start date
        logger.info("start date used: %s", start_date)

        params = {
            self.replication_start_date_field: start_date,
            self.replication_end_date_field: end_date,
            self.page_size_field: self.page_size,
        }
        return params
コード例 #2
0
 def sleep_on_ratelimit(_details):
     _, exc, _ = sys.exc_info()
     if isinstance(exc, FreshdeskRateLimited):
         retry_after = int(exc.response.headers["Retry-After"])
         logger.info(
             f"Rate limit reached. Sleeping for {retry_after} seconds")
         time.sleep(retry_after +
                    1)  # extra second to cover any fractions of second
コード例 #3
0
 def state(self, value):
     potentially_new_records_in_the_past = self._include_deleted and not value.get(
         "include_deleted", False)
     if potentially_new_records_in_the_past:
         logger.info(
             f"Ignoring bookmark for {self.name} because of enabled `include_deleted` option"
         )
     else:
         self._state = pendulum.parse(value[self.state_pk])
コード例 #4
0
ファイル: api.py プロジェクト: yevhenii-ldv/airbyte
 def sleep_on_ratelimit(_details):
     _, exc, _ = sys.exc_info()
     if isinstance(exc, HubspotRateLimited):
         # HubSpot API does not always return Retry-After value for 429 HTTP error
         retry_after = int(exc.response.headers.get("Retry-After", 3))
         logger.info(
             f"Rate limit reached. Sleeping for {retry_after} seconds")
         time.sleep(retry_after +
                    1)  # extra second to cover any fractions of second
コード例 #5
0
ファイル: client.py プロジェクト: Mu-L/airbyte
    def _open(self, binary):
        mode = "rb" if binary else "r"
        storage = self.storage_scheme
        url = self.url

        if storage == "gs://":
            return self._open_gcs_url(binary=binary)
        elif storage == "s3://":
            return self._open_aws_url(binary=binary)
        elif storage == "azure://":
            return self._open_azblob_url(binary=binary)
        elif storage == "webhdfs://":
            host = self._provider["host"]
            port = self._provider["port"]
            return smart_open.open(f"webhdfs://{host}:{port}/{url}", mode=mode)
        elif storage in ("ssh://", "scp://", "sftp://"):
            user = self._provider["user"]
            host = self._provider["host"]
            # TODO: Remove int casting when https://github.com/airbytehq/airbyte/issues/4952 is addressed
            # TODO: The "port" field in spec.json must also be changed
            _port_value = self._provider.get("port", 22)
            try:
                port = int(_port_value)
            except ValueError as err:
                raise ValueError(
                    f"{_port_value} is not a valid integer for the port"
                ) from err
            # Explicitly turn off ssh keys stored in ~/.ssh
            transport_params = {"connect_kwargs": {"look_for_keys": False}}
            if "password" in self._provider:
                password = self._provider["password"]
                uri = f"{storage}{user}:{password}@{host}:{port}/{url}"
            else:
                uri = f"{storage}{user}@{host}:{port}/{url}"
            return smart_open.open(uri,
                                   transport_params=transport_params,
                                   mode=mode)
        elif storage in ("https://", "http://"):
            transport_params = None
            if self._provider["user_agent"]:
                airbyte_version = environ.get("AIRBYTE_VERSION", "0.0")
                transport_params = {
                    "headers": {
                        "Accept-Encoding": "identity",
                        "User-Agent": f"Airbyte/{airbyte_version}"
                    }
                }
            logger.info(f"TransportParams: {transport_params}")
            return smart_open.open(
                self.full_url,
                mode=mode,
                transport_params=transport_params,
            )
        return smart_open.open(self.full_url, mode=mode)
コード例 #6
0
ファイル: api.py プロジェクト: coetzeevs/airbyte
    def read(self, getter: Callable, params: Mapping[str, Any] = None) -> Iterator:
        """Apply state filter to set of records, update cursor(state) if necessary in the end"""
        params = params or {}
        latest_cursor = None
        for record in super().read(getter, params):
            cursor = pendulum.parse(record[self.state_pk])
            if self._state and self._state.subtract(days=self.buffer_days + 1) >= cursor:
                continue
            latest_cursor = max(cursor, latest_cursor) if latest_cursor else cursor
            yield record

        if latest_cursor:
            logger.info(f"Advancing bookmark for {self.name} stream from {self._state} to {latest_cursor}")
            self._state = max(latest_cursor, self._state) if self._state else latest_cursor
コード例 #7
0
    def consume(self, credit: int):
        # Reset time window if it has elapsed
        if time.time(
        ) > self._current_period_start + self._balance_reload_period:
            self.reset_period()

        if self._credits_consumed + credit >= self._max_balance:
            sleep_time = self._balance_reload_period - (
                time.time() - self._current_period_start)
            logger.info(
                f"Reached call limit for this minute, wait for {sleep_time:.2f} seconds"
            )
            time.sleep(max(1.0, sleep_time))
            self.reset_period()

        self._credits_consumed += credit
コード例 #8
0
ファイル: api.py プロジェクト: yevhenii-ldv/airbyte
    def read_chunked(
        self,
        getter: Callable,
        params: Mapping[str, Any] = None,
        chunk_size: pendulum.duration = pendulum.duration(days=1)
    ) -> Iterator:
        params = {**params} if params else {}
        now_ts = int(pendulum.now().timestamp() * 1000)
        start_ts = int(self._start_date.timestamp() * 1000)
        chunk_size = int(chunk_size.total_seconds() * 1000)

        for ts in range(start_ts, now_ts, chunk_size):
            end_ts = ts + chunk_size
            params["startTimestamp"] = ts
            params["endTimestamp"] = end_ts
            logger.info(
                f"Reading chunk from stream {self.name} between {pendulum.from_timestamp(ts / 1000)} and {pendulum.from_timestamp(end_ts / 1000)}"
            )
            yield from super().read(getter, params)
コード例 #9
0
ファイル: api.py プロジェクト: Mu-L/airbyte
    def read(self,
             getter: Callable,
             params: Mapping[str, Any] = None) -> Iterator:
        """Update cursor(state)"""
        params = params or {}
        cursor = None
        for record in super().read(getter, params):
            "Report API return records from newest to oldest"
            if not cursor:
                cursor = pendulum.parse(record[self.state_pk])
            record[self.state_pk] = pendulum.parse(
                record[self.state_pk]).isoformat()
            yield record

        if cursor:
            new_state = max(cursor, self._state) if self._state else cursor
            if new_state != self._state:
                logger.info(
                    f"Advancing bookmark for {self.name} stream from {self._state} to {new_state}"
                )
                self._state = new_state
コード例 #10
0
ファイル: api.py プロジェクト: coetzeevs/airbyte
    def _run_job_until_completion(self, params) -> AdReportRun:
        # TODO parallelize running these jobs
        job = self._get_insights(params)
        logger.info(f"Created AdReportRun: {job} to sync insights with breakdown {self.breakdowns}")
        start_time = pendulum.now()
        sleep_seconds = 2
        while True:
            job = job.api_get()
            job_progress_pct = job["async_percent_completion"]
            logger.info(f"ReportRunId {job['report_run_id']} is {job_progress_pct}% complete")
            runtime = pendulum.now() - start_time

            if job["async_status"] == "Job Completed":
                return job
            elif job["async_status"] == "Job Failed":
                raise JobTimeoutException(f"AdReportRun {job} failed after {runtime.in_seconds()} seconds.")
            elif job["async_status"] == "Job Skipped":
                raise JobTimeoutException(f"AdReportRun {job} skipped after {runtime.in_seconds()} seconds.")

            if runtime > self.MAX_WAIT_TO_START and job_progress_pct == 0:
                raise JobTimeoutException(
                    f"AdReportRun {job} did not start after {runtime.in_seconds()} seconds. This is an intermittent error which may be fixed by retrying the job. Aborting."
                )
            elif runtime > self.MAX_WAIT_TO_FINISH:
                raise JobTimeoutException(
                    f"AdReportRun {job} did not finish after {runtime.in_seconds()} seconds. This is an intermittent error which may be fixed by retrying the job. Aborting."
                )
            logger.info(f"Sleeping {sleep_seconds} seconds while waiting for AdReportRun: {job} to complete")
            time.sleep(sleep_seconds)
            if sleep_seconds < self.MAX_ASYNC_SLEEP.in_seconds():
                sleep_seconds *= 2
コード例 #11
0
    def read(self,
             getter: Callable,
             params: Mapping[str, Any] = None) -> Iterator:
        """Read using getter, patched to respect current state"""
        params = params or {}
        params = {**params, **self._state_params()}
        latest_cursor = None
        for record in super().read(getter, params):
            cursor = pendulum.parse(record[self.state_pk])
            # filter out records older then state
            if self._state and self._state >= cursor:
                continue
            latest_cursor = max(cursor,
                                latest_cursor) if latest_cursor else cursor
            yield record

        if latest_cursor:
            logger.info(
                f"Advancing bookmark for {self.name} stream from {self._state} to {latest_cursor}"
            )
            self._state = max(latest_cursor,
                              self._state) if self._state else latest_cursor
コード例 #12
0
ファイル: api.py プロジェクト: yevhenii-ldv/airbyte
    def read(self,
             getter: Callable,
             params: Mapping[str, Any] = None) -> Iterator:
        """Apply state filter to set of records, update cursor(state) if necessary in the end"""
        latest_cursor = None
        # to track state, there is no guarantee that returned records sorted in ascending order. Having exact
        # boundary we could always ensure we don't miss records between states. In the future, if we would
        # like to save the state more often we can do this every batch
        for record in self.read_chunked(getter, params):
            yield record
            cursor = self._field_to_datetime(record[self.updated_at_field])
            latest_cursor = max(cursor,
                                latest_cursor) if latest_cursor else cursor

        if latest_cursor:
            new_state = max(latest_cursor,
                            self._state) if self._state else latest_cursor
            if new_state != self._state:
                logger.info(
                    f"Advancing bookmark for {self.name} stream from {self._state} to {latest_cursor}"
                )
                self._state = new_state
                self._start_date = self._state
コード例 #13
0
ファイル: common.py プロジェクト: zestyping/airbyte
def handle_call_rate_response(exc: FacebookRequestError) -> bool:
    pause_time = DEFAULT_SLEEP_INTERVAL
    platform_header = exc.http_headers().get("x-app-usage") or exc.http_headers().get("x-ad-account-usage")
    if platform_header:
        platform_header = json.loads(platform_header)
        call_count = platform_header.get("call_count") or platform_header.get("acc_id_util_pct")
        if call_count and call_count > 99:
            logger.info(f"Reached platform call limit: {exc}")

    buc_header = exc.http_headers().get("x-business-use-case-usage")
    buc_header = json.loads(buc_header) if buc_header else {}
    for business_object_id, stats in buc_header.items():
        if stats.get("call_count", 0) > 99:
            logger.info(f"Reached call limit on {stats['type']}: {exc}")
            pause_time = max(pause_time, stats["estimated_time_to_regain_access"])
    logger.info(f"Sleeping for {pause_time.total_seconds()} seconds")
    sleep(pause_time.total_seconds())

    return True
コード例 #14
0
 def log_retry_attempt(details):
     _, exc, _ = sys.exc_info()
     logger.info(str(exc))
     logger.info(
         f"Caught retryable error after {details['tries']} tries. Waiting {details['wait']} more seconds then retrying..."
     )