Example #1
0
 def __init__(self, session_id=uuid4(), language_code='en'):
     self.session_id = session_id
     self._session = session.session_path(project_id, session_id)
     self._kb = kb.project_path(project_id)
     self.language_code = language_code
     self.min_confidence = 0.8
     self._retry = {'retry': Retry(), 'timeout': 10}
Example #2
0
def get_table_with_retry(client, ref):
    while True:
        try:
            return client.get_table(ref, retry=Retry(deadline=60), timeout=5)
        except ReadTimeout as e:
            print(e)
            print('Retrying...')
def test_bulkwritebatch_write_w_retry_timeout():
    from google.api_core.retry import Retry

    retry = Retry(predicate=object())
    timeout = 123.0

    _write_helper(retry=retry, timeout=timeout)
def _make_unary_call(client, customer_id):
    """Makes a unary call using a custom client timeout.

    Args:
        client: An initialized GoogleAds client.
        customer_id: The Google Ads customer ID.
    """
    ga_service = client.get_service("GoogleAdsService")
    campaign_ids = []

    try:
        search_request = client.get_type("SearchGoogleAdsRequest")
        search_request.customer_id = customer_id
        search_request.query = _QUERY
        results = ga_service.search(
            request=search_request,
            # As of v5, any unary call is retryable and has default retry
            # settings. Complete information about these settings can be found
            # here: https://googleapis.dev/python/google-api-core/latest/retry.html
            #
            # For this particular call, the default retry settings can be found
            # in the following file:
            # https://github.com/googleads/google-ads-python/blob/master/google/ads/google_ads/v6/services/google_ads_service_client_config.py
            #
            # When making a unary call, an optional argument is provided and
            # can be used to override the default retry settings with given
            # values.
            retry=Retry(
                # Sets the maximum accumulative timeout of the call; it
                # includes all tries.
                deadline=_CLIENT_TIMEOUT_SECONDS,
                # Sets the timeout that is used for the first try to one tenth
                # of the maximum accumulative timeout of the call.
                initial=_CLIENT_TIMEOUT_SECONDS / 10,
                # Sets the maximum timeout that can be used for any given try
                # to one fifth of the maximum accumulative timeout of the call
                # (two times greater than the timeout that is needed for the
                # first try).
                maximum=_CLIENT_TIMEOUT_SECONDS / 5,
            ),
        )

        for row in results:
            campaign_ids.append(row.campaign.id)

        print("The unary call completed before the timeout.")
    except DeadlineExceeded as ex:
        print("The unary call did not complete before the timeout.")
        sys.exit(1)
    except GoogleAdsException as ex:
        print(f"Request with ID '{ex.request_id}' failed with status "
              f"'{ex.error.code().name}' and includes the following errors:")
        for error in ex.failure.errors:
            print(f"\tError with message '{error.message}'.")
            if error.location:
                for field_path_element in error.location.field_path_elements:
                    print(f"\t\tOn field: {field_path_element.field_name}")
        sys.exit(1)

    print(f"Total # of campaign IDs retrieved: {len(campaign_ids)}")
Example #5
0
    def delete_file(self, filename, mode, partitions=None, not_found_ok=False):
        """Deletes file from path `<bucket_name>/<mode>/<dataset_id>/<table_id>/<partitions>/<filename>`.

        Args:
            filename (str): Name of the file to be deleted

            mode (str): Folder of which dataset to update [raw|staging|header|auxiliary_files|architecture|all]

            partitions (str, pathlib.PosixPath, or dict): Optional.
                Hive structured partition as a string or dict

                * str : `<key>=<value>/<key2>=<value2>`
                * dict: `dict(key=value, key2=value2)`

            not_found_ok (bool): Optional.
                What to do if file not found
        """

        self._check_mode(mode)

        mode = ([
            "raw", "staging", "header", "auxiliary_files", "architecture"
        ] if mode == "all" else [mode])
        # define retry policy for google cloud storage exceptions

        for m in mode:

            blob = self.bucket.blob(
                self._build_blob_name(filename, m, partitions))

            if blob.exists() or not blob.exists() and not not_found_ok:
                blob.delete(retry=Retry(predicate=_is_retryable))
            else:
                return
Example #6
0
    def test_commit_w_retry_timeout(self):
        from google.api_core.retry import Retry

        retry = Retry(predicate=object())
        timeout = 123.0

        self._commit_helper(retry=retry, timeout=timeout)
Example #7
0
def get_transfer_job_with_retries(
        project_id: str, job_name: str, max_retry_duration: float):
    """
    Check the latest transfer operation associated with a transfer job with
    retries.
    """

    client = storage_transfer.StorageTransferServiceClient()

    # The ID of the Google Cloud Platform Project that owns the job
    # project_id = 'my-project-id'

    # Storage Transfer Service job name
    # job_name = 'transferJobs/1234567890'

    # The maximum amount of time to delay in seconds
    # max_retry_duration = 60

    transfer_job = client.get_transfer_job({
        'project_id': project_id,
        'job_name': job_name,
    },
        retry=Retry(maximum=max_retry_duration)
    )

    print(f"Fetched transfer job: {transfer_job.name} "
          f"with a max retry duration of {max_retry_duration}s")
    def tearDown(self):
        dataset = self.client.dataset(self.test_dataset_id)
        self.client.delete_table(self.source_table_ref)

        self.client.delete_table(self.destination_table_ref)

        self.client.delete_dataset(dataset, retry=Retry())
Example #9
0
def get_retry():
    retry_kwargs = {}
    if getattr(settings, 'GTASK_ON_ERROR', None):
        retry_kwargs.update({'on_error': settings.GTASK_ON_ERROR})
    if getattr(settings, 'GTASK_RETRY_DEADLINE', None):
        retry_kwargs.update({'deadline': settings.GTASK_RETRY_DEADLINE})
    retry = Retry(deadline=10, **retry_kwargs)
    return retry
Example #10
0
def get_client(config: dict) -> PublisherClient:
    """Create a pubsub client."""
    # Initialize PubSub client
    timeout = config.get("PUBLISH_TIMEOUT_SECONDS", None)
    client = PublisherClient()
    client.api.publish = partial(
        client.api.publish,
        retry=Retry(TRANSIENT_ERRORS, deadline=timeout),
        timeout=timeout,
    )
    client._batch_class = AsyncioBatch
    return client
def upload_png_files(logger, num_workers, filepaths_to_upload, bucket, stats):
    google_retry = Retry(deadline=480, maximum=240)

    filepaths_to_upload_queue = Queue()
    for filepath in filepaths_to_upload:
        blob_name: str = os.path.join("png_image_files", filepath.rsplit('/')[-1])
        filepaths_to_upload_queue.put((blob_name, filepath))

    def on_google_retry_error(ex: Exception):
        logger.error("Exception when uploading blob to google cloud.")
        logger.exception(ex)

    def google_cloud_uploader():
        start = time.time()
        while True:
            blob_name, filepath = filepaths_to_upload_queue.get(timeout=5)
            blob = bucket.blob(blob_name)
            content_type = "image/png"
            try:
                google_retry(blob.upload_from_filename(filepath, content_type=content_type),
                             on_error=on_google_retry_error)
            except Exception as ex:
                logger.error(f"Uncaught exception when uploading blob to google cloud.")
                logger.exception(ex)
                filepaths_to_upload_queue.put((blob.name, filepath))
                raise ex
            stats['num_files_uploaded'] += 1

            if stats['num_files_uploaded'] > stats['checkpoint']:
                elapsed = (time.time() - start) / 60
                logger.info(
                    f"Uploaded {stats['num_files_uploaded']} files in {elapsed} minutes, {stats['num_files_uploaded'] / elapsed} files per minute.")
                stats['checkpoint'] += 1000

    with ThreadPoolExecutor(max_workers=num_workers + 1) as executor:
        tasks: List[Future] = []
        for x in range(num_workers):
            tasks.append(executor.submit(google_cloud_uploader))
        logger.info(f"Started {len(tasks)} worker tasks.")

        logger.info("Starting traverse_directory")
        for task in as_completed(tasks):
            if task.exception() is not None:
                if type(task.exception()) == Empty:
                    logger.info("Child thread completed")
                else:
                    logger.error("Child thread failed")
                    logger.exception(task.exception())

    logger.info("Ending upload.")
Example #12
0
def init_app(app: Sanic) -> Tuple[PublisherClient, SQLiteAckQueue]:
    """Initialize Sanic app with url rules."""
    # Initialize PubSub client
    timeout = app.config.get("PUBLISH_TIMEOUT_SECONDS", None)
    client = PublisherClient()
    client.api.publish = partial(
        client.api.publish,
        retry=Retry(TRANSIENT_ERRORS, deadline=timeout),
        timeout=timeout,
    )
    client._batch_class = AsyncioBatch
    # Use a SQLiteAckQueue because:
    # * we use acks to ensure messages only removed on success
    # * persist-queue's SQLite*Queue is faster than its Queue
    # * SQLite provides thread-safe and process-safe access
    queue_config = {
        key[6:].lower(): value
        for key, value in app.config.items()
        if key.startswith("QUEUE_")
    }
    q = SQLiteAckQueue(**queue_config)
    # get metadata_headers config
    metadata_headers = app.config["METADATA_HEADERS"]
    # validate attribute keys
    for attribute in metadata_headers.values():
        if len(attribute.encode("utf8")) > 256:
            # https://cloud.google.com/pubsub/quotas#resource_limits
            raise ValueError("Metadata attribute exceeds key size limit of 256 bytes")
    # generate one view_func per topic
    handlers = {
        route.topic: partial(
            submit,
            client=client,
            q=q,
            topic=route.topic,
            metadata_headers=metadata_headers,
        )
        for route in app.config["ROUTE_TABLE"]
    }
    # add routes for ROUTE_TABLE
    for route in app.config["ROUTE_TABLE"]:
        app.add_route(
            handler=handlers[route.topic],
            uri=route.uri,
            methods=[method.upper() for method in route.methods],
            # required because handler.__name__ does not exist
            # must be a unique name for each handler
            name="submit_" + route.topic,
        )
    return client, q
def test_stream_w_retry_timeout(query_class):
    from google.api_core.retry import Retry

    retry = Retry(predicate=object())
    timeout = 123.0
    collection = _make_collection_reference("collection")
    stream_response = collection.stream(retry=retry, timeout=timeout)

    query_class.assert_called_once_with(collection)
    query_instance = query_class.return_value
    assert stream_response is query_instance.stream.return_value
    query_instance.stream.assert_called_once_with(
        transaction=None, retry=retry, timeout=timeout,
    )
    def test_get_w_retry_timeout(self, query_class):
        from google.api_core.retry import Retry

        retry = Retry(predicate=object())
        timeout = 123.0
        collection = self._make_one("collection")
        get_response = collection.get(retry=retry, timeout=timeout)

        query_class.assert_called_once_with(collection)
        query_instance = query_class.return_value

        self.assertIs(get_response, query_instance.get.return_value)
        query_instance.get.assert_called_once_with(
            transaction=None, retry=retry, timeout=timeout,
        )
Example #15
0
async def test_asynccollectionreference_get_w_retry_timeout(query_class):
    from google.api_core.retry import Retry

    retry = Retry(predicate=object())
    timeout = 123.0
    collection = _make_async_collection_reference("collection")
    get_response = await collection.get(retry=retry, timeout=timeout)

    query_class.assert_called_once_with(collection)
    query_instance = query_class.return_value

    assert get_response is query_instance.get.return_value
    query_instance.get.assert_called_once_with(
        transaction=None, retry=retry, timeout=timeout,
    )
Example #16
0
def analyze(nlp_client, comment):
    """Run a sentiment analysis request on text within a passed comment."""
    document = types.Document(content=comment,
                              type=enums.Document.Type.PLAIN_TEXT)
    # send back the sentiment to page processing
    # we do this in a try/except block for language errors. the library doesn't gracefully
    # handle languages that can't be sent into NL and the fastest way to deal with this
    # is to just except it away and move on
    try:
        # notice we add in the Retry() object here to handle retries in a default way
        annotations = nlp_client.analyze_sentiment(document=document,
                                                   retry=Retry())
    except:
        return 0
    return annotations
Example #17
0
 def assert_flushed(self):
     """Wait for flush then assert queue empty and message delivered."""
     retry = Retry(
         lambda e: isinstance(e, AssertionError),
         initial=PUBLISH_TIMEOUT_SECONDS,
         multiplier=1,
         # wait up to two flush cycles, with one second extra overhead per flush
         deadline=(FLUSH_SLEEP_SECONDS + PUBLISH_TIMEOUT_SECONDS) * 2,
     )
     if self.uses_cluster:
         # detect flush from delivered
         retry(self.assert_delivered)()
     else:
         # detect flush from heartbeat and validate delivered
         retry(self.assert_queue_empty)()
         self.assert_delivered()
Example #18
0
    def _bulk_write(self,
                    rows,
                    annotation_ids=None,
                    operation_id=None,
                    slow_retry=True):
        """ Writes a list of mutated rows in bulk

        WARNING: If <rows> contains the same row (same row_key) and column
        key two times only the last one is effectively written to the BigTable
        (even when the mutations were applied to different columns)
        --> no versioning!

        :param rows: list
            list of mutated rows
        :param annotation_ids: list if uint64
        :param operation_id: str or None
            operation_id (or other unique id) that *was* used to lock the root
            the bulk write is only executed if the root is still locked with
            the same id.
        :param slow_retry: bool
        """
        if slow_retry:
            initial = 5
        else:
            initial = 1

        retry_policy = Retry(predicate=if_exception_type(
            (Aborted, DeadlineExceeded, ServiceUnavailable)),
                             initial=initial,
                             maximum=15.0,
                             multiplier=2.0,
                             deadline=LOCK_EXPIRED_TIME_DELTA.seconds)

        if annotation_ids is not None and operation_id is not None:
            if isinstance(annotation_ids, int):
                annotation_ids = [annotation_ids]

            if not self._check_and_renew_annotation_locks(
                    annotation_ids, operation_id):
                return False

        status = self.table.mutate_rows(rows, retry=retry_policy)

        if not any(status):
            raise Exception(status)

        return True
Example #19
0
async def test_asynccollectionreference_stream_w_retry_timeout(query_class):
    from google.api_core.retry import Retry

    retry = Retry(predicate=object())
    timeout = 123.0
    query_class.return_value.stream.return_value = AsyncIter(range(3))

    collection = _make_async_collection_reference("collection")
    stream_response = collection.stream(retry=retry, timeout=timeout)

    async for _ in stream_response:
        pass

    query_class.assert_called_once_with(collection)
    query_instance = query_class.return_value
    query_instance.stream.assert_called_once_with(
        transaction=None, retry=retry, timeout=timeout,
    )
Example #20
0
        def drop_rows(self, fq_table_name):
            """
            Helper function to drop table rows and assert the drop finished.

            Raises an assertion error if the table records are not dropped.
            Drops all rows.

            :param fq_table_name: a fully qualified table name to drop all
                records for
            """
            query = f"delete from {fq_table_name} where true"

            query_retry = Retry()
            response = self.client.query(query, retry=query_retry, timeout=30)

            # start the job and wait for it to complete
            self.assertIsNotNone(response.result())
            self.assertIsNone(response.exception())
Example #21
0
def test_query_retry_539(bigquery_client, dataset_id, job_retry_on_query):
    """
    Test job_retry

    See: https://github.com/googleapis/python-bigquery/issues/539
    """
    from google.api_core import exceptions
    from google.api_core.retry import if_exception_type, Retry

    table_name = f"{dataset_id}.t539"

    # Without a custom retry, we fail:
    with pytest.raises(google.api_core.exceptions.NotFound):
        bigquery_client.query(f"select count(*) from {table_name}").result()

    retry_notfound = Retry(predicate=if_exception_type(exceptions.NotFound))

    job_retry = dict(job_retry=retry_notfound) if job_retry_on_query else {}
    job = bigquery_client.query(f"select count(*) from {table_name}",
                                **job_retry)
    job_id = job.job_id

    # We can already know that the job failed, but we're not supposed
    # to find out until we call result, which is where retry happend
    assert job.done()
    assert job.exception() is not None

    @thread
    def create_table():
        time.sleep(1)  # Give the first retry attempt time to fail.
        with contextlib.closing(google.cloud.bigquery.Client()) as client:
            client.query(f"create table {table_name} (id int64)").result()

    job_retry = {} if job_retry_on_query else dict(job_retry=retry_notfound)
    [[count]] = list(job.result(**job_retry))
    assert count == 0

    # The job was retried, and thus got a new job id
    assert job.job_id != job_id

    # Make sure we don't leave a thread behind:
    create_table.join()
    bigquery_client.query(f"drop table {table_name}").result()
Example #22
0
    def bulk_write(self,
                   rows: Sequence[bigtable.row.DirectRow],
                   slow_retry: bool = True,
                   block_size: int = 2000) -> bool:
        """ Writes a list of mutated rows in bulk

        WARNING: If <rows> contains the same row (same row_key) and column
        key two times only the last one is effectively written to the BigTable
        (even when the mutations were applied to different columns)
        --> no versioning!

        :param rows: list
            list of mutated rows
        :param slow_retry: bool
        :param block_size: int
        """
        if slow_retry:
            initial = 5
        else:
            initial = 1

        retry_policy = Retry(predicate=if_exception_type(
            (Aborted, DeadlineExceeded, ServiceUnavailable)),
                             initial=initial,
                             maximum=15.0,
                             multiplier=2.0,
                             deadline=20)

        for i_row in range(0, len(rows), block_size):
            status = self.table.mutate_rows(rows[i_row:i_row + block_size],
                                            retry=retry_policy)

            if not all(status):
                raise Exception(status)

        return True
Example #23
0
# [START howto_operator_vision_detect_image_param]
DETECT_IMAGE = {"source": {"image_uri": GCP_VISION_ANNOTATE_IMAGE_URL}}
# [END howto_operator_vision_detect_image_param]

with models.DAG('example_gcp_vision_autogenerated_id',
                default_args=default_args,
                schedule_interval=None) as dag_autogenerated_id:
    # ################################## #
    # ### Autogenerated IDs examples ### #
    # ################################## #

    # [START howto_operator_vision_product_set_create]
    product_set_create = CloudVisionProductSetCreateOperator(
        location=GCP_VISION_LOCATION,
        product_set=product_set,
        retry=Retry(maximum=10.0),
        timeout=5,
        task_id='product_set_create',
    )
    # [END howto_operator_vision_product_set_create]

    # [START howto_operator_vision_product_set_get]
    product_set_get = CloudVisionProductSetGetOperator(
        location=GCP_VISION_LOCATION,
        product_set_id="{{ task_instance.xcom_pull('product_set_create') }}",
        task_id='product_set_get',
    )
    # [END howto_operator_vision_product_set_get]

    # [START howto_operator_vision_product_set_update]
    product_set_update = CloudVisionProductSetUpdateOperator(
    async def test_list_documents_w_retry_timeout(self):
        from google.api_core.retry import Retry

        retry = Retry(predicate=object())
        timeout = 123.0
        await self._list_documents_helper(retry=retry, timeout=timeout)
    CloudMemorystoreFailoverInstanceOperator,
    CloudMemorystoreGetInstanceOperator,
    CloudMemorystoreImportOperator,
    CloudMemorystoreListInstancesOperator,
    CloudMemorystoreScaleInstanceOperator,
    CloudMemorystoreUpdateInstanceOperator,
)

TEST_GCP_CONN_ID = "test-gcp-conn-id"
TEST_TASK_ID = "task-id"
TEST_LOCATION = "test-location"
TEST_INSTANCE_ID = "test-instance-id"
TEST_INSTANCE = Instance(name="instance")
TEST_INSTANCE_NAME = "test-instance-name"
TEST_PROJECT_ID = "test-project-id"
TEST_RETRY = Retry()  # type: Retry
TEST_TIMEOUT = 10  # type: float
TEST_INSTANCE_SIZE = 4  # type: int
TEST_METADATA = [("KEY", "VALUE")]  # type: Sequence[Tuple[str, str]]
TEST_OUTPUT_CONFIG = {
    "gcs_destination": {
        "uri": "gs://test-bucket/file.rdb"
    }
}  # type: Dict
TEST_DATA_PROTECTION_MODE = FailoverInstanceRequest.DataProtectionMode.LIMITED_DATA_LOSS
TEST_INPUT_CONFIG = {
    "gcs_source": {
        "uri": "gs://test-bucket/file.rdb"
    }
}  # type: Dict
TEST_PAGE_SIZE = 100  # type: int
Example #26
0
def test_transaction_get_w_query_w_retry_timeout():
    from google.api_core.retry import Retry

    retry = Retry(predicate=object())
    timeout = 123.0
    _transaction_get_w_query_helper(retry=retry, timeout=timeout)
Example #27
0
# Maximum number of mutations in bulk (MutateRowsRequest message):
# (https://cloud.google.com/bigtable/docs/reference/data/rpc/
#  google.bigtable.v2#google.bigtable.v2.MutateRowRequest)
_MAX_BULK_MUTATIONS = 100000
VIEW_NAME_ONLY = enums.Table.View.NAME_ONLY


class _BigtableRetryableError(Exception):
    """Retry-able error expected by the default retry strategy."""


DEFAULT_RETRY = Retry(
    predicate=if_exception_type(_BigtableRetryableError),
    initial=1.0,
    maximum=15.0,
    multiplier=2.0,
    deadline=120.0,  # 2 minutes
)
"""The default retry strategy to be used on retry-able errors.

Used by :meth:`~google.cloud.bigtable.table.Table.mutate_rows`.
"""


class TableMismatchError(ValueError):
    """Row from another table."""


class TooManyMutationsError(ValueError):
    """The number of mutations for bulk request is too big."""
Example #28
0
def _transient_string_in_exception_message(exc):
    # type: (Exception) -> bool
    """Determines whether an exception's message contains a common message for transient errors.

    The exception's message containing one of these substrings is sufficient to determine that it is
    transient, but there can be transient exceptions whose messages do not contain these substrings.
    """
    return ('The job encountered an internal error during execution'
            in str(exc)
            or 'Retrying the job may solve the problem' in str(exc))


# Retry object for errors encountered in making API calls (executing jobs, etc.)
DEFAULT_RETRY_FOR_API_CALLS = Retry(
    # The predicate takes an exception and returns whether it is transient.
    predicate=lambda exc: (bq_retry.DEFAULT_RETRY._predicate(exc) or
                           _transient_string_in_exception_message(exc)),
    deadline=DEFAULT_TIMEOUT_SEC)

# Retry object for errors encountered while polling jobs in progress.
# See https://github.com/googleapis/google-cloud-python/issues/6301
DEFAULT_RETRY_FOR_ASYNC_JOBS = Retry(
    # The predicate takes an exception and returns whether it is transient.
    predicate=lambda exc: (polling.DEFAULT_RETRY._predicate(exc) or
                           _transient_string_in_exception_message(exc)),
    deadline=DEFAULT_TIMEOUT_SEC)


class BigqueryBaseClient(object):
    """Stores credentials and pointers to a BigQuery project.
Example #29
0
    async def test_get_partitions_w_retry_timeout(self):
        from google.api_core.retry import Retry

        retry = Retry(predicate=object())
        timeout = 123.0
        await self._get_partitions_helper(retry=retry, timeout=timeout)
Example #30
0
async def test_asynccollectionreference_list_documents_w_retry_timeout():
    from google.api_core.retry import Retry

    retry = Retry(predicate=object())
    timeout = 123.0
    await _list_documents_helper(retry=retry, timeout=timeout)