def __init__(self, session_id=uuid4(), language_code='en'): self.session_id = session_id self._session = session.session_path(project_id, session_id) self._kb = kb.project_path(project_id) self.language_code = language_code self.min_confidence = 0.8 self._retry = {'retry': Retry(), 'timeout': 10}
def get_table_with_retry(client, ref): while True: try: return client.get_table(ref, retry=Retry(deadline=60), timeout=5) except ReadTimeout as e: print(e) print('Retrying...')
def test_bulkwritebatch_write_w_retry_timeout(): from google.api_core.retry import Retry retry = Retry(predicate=object()) timeout = 123.0 _write_helper(retry=retry, timeout=timeout)
def _make_unary_call(client, customer_id): """Makes a unary call using a custom client timeout. Args: client: An initialized GoogleAds client. customer_id: The Google Ads customer ID. """ ga_service = client.get_service("GoogleAdsService") campaign_ids = [] try: search_request = client.get_type("SearchGoogleAdsRequest") search_request.customer_id = customer_id search_request.query = _QUERY results = ga_service.search( request=search_request, # As of v5, any unary call is retryable and has default retry # settings. Complete information about these settings can be found # here: https://googleapis.dev/python/google-api-core/latest/retry.html # # For this particular call, the default retry settings can be found # in the following file: # https://github.com/googleads/google-ads-python/blob/master/google/ads/google_ads/v6/services/google_ads_service_client_config.py # # When making a unary call, an optional argument is provided and # can be used to override the default retry settings with given # values. retry=Retry( # Sets the maximum accumulative timeout of the call; it # includes all tries. deadline=_CLIENT_TIMEOUT_SECONDS, # Sets the timeout that is used for the first try to one tenth # of the maximum accumulative timeout of the call. initial=_CLIENT_TIMEOUT_SECONDS / 10, # Sets the maximum timeout that can be used for any given try # to one fifth of the maximum accumulative timeout of the call # (two times greater than the timeout that is needed for the # first try). maximum=_CLIENT_TIMEOUT_SECONDS / 5, ), ) for row in results: campaign_ids.append(row.campaign.id) print("The unary call completed before the timeout.") except DeadlineExceeded as ex: print("The unary call did not complete before the timeout.") sys.exit(1) except GoogleAdsException as ex: print(f"Request with ID '{ex.request_id}' failed with status " f"'{ex.error.code().name}' and includes the following errors:") for error in ex.failure.errors: print(f"\tError with message '{error.message}'.") if error.location: for field_path_element in error.location.field_path_elements: print(f"\t\tOn field: {field_path_element.field_name}") sys.exit(1) print(f"Total # of campaign IDs retrieved: {len(campaign_ids)}")
def delete_file(self, filename, mode, partitions=None, not_found_ok=False): """Deletes file from path `<bucket_name>/<mode>/<dataset_id>/<table_id>/<partitions>/<filename>`. Args: filename (str): Name of the file to be deleted mode (str): Folder of which dataset to update [raw|staging|header|auxiliary_files|architecture|all] partitions (str, pathlib.PosixPath, or dict): Optional. Hive structured partition as a string or dict * str : `<key>=<value>/<key2>=<value2>` * dict: `dict(key=value, key2=value2)` not_found_ok (bool): Optional. What to do if file not found """ self._check_mode(mode) mode = ([ "raw", "staging", "header", "auxiliary_files", "architecture" ] if mode == "all" else [mode]) # define retry policy for google cloud storage exceptions for m in mode: blob = self.bucket.blob( self._build_blob_name(filename, m, partitions)) if blob.exists() or not blob.exists() and not not_found_ok: blob.delete(retry=Retry(predicate=_is_retryable)) else: return
def test_commit_w_retry_timeout(self): from google.api_core.retry import Retry retry = Retry(predicate=object()) timeout = 123.0 self._commit_helper(retry=retry, timeout=timeout)
def get_transfer_job_with_retries( project_id: str, job_name: str, max_retry_duration: float): """ Check the latest transfer operation associated with a transfer job with retries. """ client = storage_transfer.StorageTransferServiceClient() # The ID of the Google Cloud Platform Project that owns the job # project_id = 'my-project-id' # Storage Transfer Service job name # job_name = 'transferJobs/1234567890' # The maximum amount of time to delay in seconds # max_retry_duration = 60 transfer_job = client.get_transfer_job({ 'project_id': project_id, 'job_name': job_name, }, retry=Retry(maximum=max_retry_duration) ) print(f"Fetched transfer job: {transfer_job.name} " f"with a max retry duration of {max_retry_duration}s")
def tearDown(self): dataset = self.client.dataset(self.test_dataset_id) self.client.delete_table(self.source_table_ref) self.client.delete_table(self.destination_table_ref) self.client.delete_dataset(dataset, retry=Retry())
def get_retry(): retry_kwargs = {} if getattr(settings, 'GTASK_ON_ERROR', None): retry_kwargs.update({'on_error': settings.GTASK_ON_ERROR}) if getattr(settings, 'GTASK_RETRY_DEADLINE', None): retry_kwargs.update({'deadline': settings.GTASK_RETRY_DEADLINE}) retry = Retry(deadline=10, **retry_kwargs) return retry
def get_client(config: dict) -> PublisherClient: """Create a pubsub client.""" # Initialize PubSub client timeout = config.get("PUBLISH_TIMEOUT_SECONDS", None) client = PublisherClient() client.api.publish = partial( client.api.publish, retry=Retry(TRANSIENT_ERRORS, deadline=timeout), timeout=timeout, ) client._batch_class = AsyncioBatch return client
def upload_png_files(logger, num_workers, filepaths_to_upload, bucket, stats): google_retry = Retry(deadline=480, maximum=240) filepaths_to_upload_queue = Queue() for filepath in filepaths_to_upload: blob_name: str = os.path.join("png_image_files", filepath.rsplit('/')[-1]) filepaths_to_upload_queue.put((blob_name, filepath)) def on_google_retry_error(ex: Exception): logger.error("Exception when uploading blob to google cloud.") logger.exception(ex) def google_cloud_uploader(): start = time.time() while True: blob_name, filepath = filepaths_to_upload_queue.get(timeout=5) blob = bucket.blob(blob_name) content_type = "image/png" try: google_retry(blob.upload_from_filename(filepath, content_type=content_type), on_error=on_google_retry_error) except Exception as ex: logger.error(f"Uncaught exception when uploading blob to google cloud.") logger.exception(ex) filepaths_to_upload_queue.put((blob.name, filepath)) raise ex stats['num_files_uploaded'] += 1 if stats['num_files_uploaded'] > stats['checkpoint']: elapsed = (time.time() - start) / 60 logger.info( f"Uploaded {stats['num_files_uploaded']} files in {elapsed} minutes, {stats['num_files_uploaded'] / elapsed} files per minute.") stats['checkpoint'] += 1000 with ThreadPoolExecutor(max_workers=num_workers + 1) as executor: tasks: List[Future] = [] for x in range(num_workers): tasks.append(executor.submit(google_cloud_uploader)) logger.info(f"Started {len(tasks)} worker tasks.") logger.info("Starting traverse_directory") for task in as_completed(tasks): if task.exception() is not None: if type(task.exception()) == Empty: logger.info("Child thread completed") else: logger.error("Child thread failed") logger.exception(task.exception()) logger.info("Ending upload.")
def init_app(app: Sanic) -> Tuple[PublisherClient, SQLiteAckQueue]: """Initialize Sanic app with url rules.""" # Initialize PubSub client timeout = app.config.get("PUBLISH_TIMEOUT_SECONDS", None) client = PublisherClient() client.api.publish = partial( client.api.publish, retry=Retry(TRANSIENT_ERRORS, deadline=timeout), timeout=timeout, ) client._batch_class = AsyncioBatch # Use a SQLiteAckQueue because: # * we use acks to ensure messages only removed on success # * persist-queue's SQLite*Queue is faster than its Queue # * SQLite provides thread-safe and process-safe access queue_config = { key[6:].lower(): value for key, value in app.config.items() if key.startswith("QUEUE_") } q = SQLiteAckQueue(**queue_config) # get metadata_headers config metadata_headers = app.config["METADATA_HEADERS"] # validate attribute keys for attribute in metadata_headers.values(): if len(attribute.encode("utf8")) > 256: # https://cloud.google.com/pubsub/quotas#resource_limits raise ValueError("Metadata attribute exceeds key size limit of 256 bytes") # generate one view_func per topic handlers = { route.topic: partial( submit, client=client, q=q, topic=route.topic, metadata_headers=metadata_headers, ) for route in app.config["ROUTE_TABLE"] } # add routes for ROUTE_TABLE for route in app.config["ROUTE_TABLE"]: app.add_route( handler=handlers[route.topic], uri=route.uri, methods=[method.upper() for method in route.methods], # required because handler.__name__ does not exist # must be a unique name for each handler name="submit_" + route.topic, ) return client, q
def test_stream_w_retry_timeout(query_class): from google.api_core.retry import Retry retry = Retry(predicate=object()) timeout = 123.0 collection = _make_collection_reference("collection") stream_response = collection.stream(retry=retry, timeout=timeout) query_class.assert_called_once_with(collection) query_instance = query_class.return_value assert stream_response is query_instance.stream.return_value query_instance.stream.assert_called_once_with( transaction=None, retry=retry, timeout=timeout, )
def test_get_w_retry_timeout(self, query_class): from google.api_core.retry import Retry retry = Retry(predicate=object()) timeout = 123.0 collection = self._make_one("collection") get_response = collection.get(retry=retry, timeout=timeout) query_class.assert_called_once_with(collection) query_instance = query_class.return_value self.assertIs(get_response, query_instance.get.return_value) query_instance.get.assert_called_once_with( transaction=None, retry=retry, timeout=timeout, )
async def test_asynccollectionreference_get_w_retry_timeout(query_class): from google.api_core.retry import Retry retry = Retry(predicate=object()) timeout = 123.0 collection = _make_async_collection_reference("collection") get_response = await collection.get(retry=retry, timeout=timeout) query_class.assert_called_once_with(collection) query_instance = query_class.return_value assert get_response is query_instance.get.return_value query_instance.get.assert_called_once_with( transaction=None, retry=retry, timeout=timeout, )
def analyze(nlp_client, comment): """Run a sentiment analysis request on text within a passed comment.""" document = types.Document(content=comment, type=enums.Document.Type.PLAIN_TEXT) # send back the sentiment to page processing # we do this in a try/except block for language errors. the library doesn't gracefully # handle languages that can't be sent into NL and the fastest way to deal with this # is to just except it away and move on try: # notice we add in the Retry() object here to handle retries in a default way annotations = nlp_client.analyze_sentiment(document=document, retry=Retry()) except: return 0 return annotations
def assert_flushed(self): """Wait for flush then assert queue empty and message delivered.""" retry = Retry( lambda e: isinstance(e, AssertionError), initial=PUBLISH_TIMEOUT_SECONDS, multiplier=1, # wait up to two flush cycles, with one second extra overhead per flush deadline=(FLUSH_SLEEP_SECONDS + PUBLISH_TIMEOUT_SECONDS) * 2, ) if self.uses_cluster: # detect flush from delivered retry(self.assert_delivered)() else: # detect flush from heartbeat and validate delivered retry(self.assert_queue_empty)() self.assert_delivered()
def _bulk_write(self, rows, annotation_ids=None, operation_id=None, slow_retry=True): """ Writes a list of mutated rows in bulk WARNING: If <rows> contains the same row (same row_key) and column key two times only the last one is effectively written to the BigTable (even when the mutations were applied to different columns) --> no versioning! :param rows: list list of mutated rows :param annotation_ids: list if uint64 :param operation_id: str or None operation_id (or other unique id) that *was* used to lock the root the bulk write is only executed if the root is still locked with the same id. :param slow_retry: bool """ if slow_retry: initial = 5 else: initial = 1 retry_policy = Retry(predicate=if_exception_type( (Aborted, DeadlineExceeded, ServiceUnavailable)), initial=initial, maximum=15.0, multiplier=2.0, deadline=LOCK_EXPIRED_TIME_DELTA.seconds) if annotation_ids is not None and operation_id is not None: if isinstance(annotation_ids, int): annotation_ids = [annotation_ids] if not self._check_and_renew_annotation_locks( annotation_ids, operation_id): return False status = self.table.mutate_rows(rows, retry=retry_policy) if not any(status): raise Exception(status) return True
async def test_asynccollectionreference_stream_w_retry_timeout(query_class): from google.api_core.retry import Retry retry = Retry(predicate=object()) timeout = 123.0 query_class.return_value.stream.return_value = AsyncIter(range(3)) collection = _make_async_collection_reference("collection") stream_response = collection.stream(retry=retry, timeout=timeout) async for _ in stream_response: pass query_class.assert_called_once_with(collection) query_instance = query_class.return_value query_instance.stream.assert_called_once_with( transaction=None, retry=retry, timeout=timeout, )
def drop_rows(self, fq_table_name): """ Helper function to drop table rows and assert the drop finished. Raises an assertion error if the table records are not dropped. Drops all rows. :param fq_table_name: a fully qualified table name to drop all records for """ query = f"delete from {fq_table_name} where true" query_retry = Retry() response = self.client.query(query, retry=query_retry, timeout=30) # start the job and wait for it to complete self.assertIsNotNone(response.result()) self.assertIsNone(response.exception())
def test_query_retry_539(bigquery_client, dataset_id, job_retry_on_query): """ Test job_retry See: https://github.com/googleapis/python-bigquery/issues/539 """ from google.api_core import exceptions from google.api_core.retry import if_exception_type, Retry table_name = f"{dataset_id}.t539" # Without a custom retry, we fail: with pytest.raises(google.api_core.exceptions.NotFound): bigquery_client.query(f"select count(*) from {table_name}").result() retry_notfound = Retry(predicate=if_exception_type(exceptions.NotFound)) job_retry = dict(job_retry=retry_notfound) if job_retry_on_query else {} job = bigquery_client.query(f"select count(*) from {table_name}", **job_retry) job_id = job.job_id # We can already know that the job failed, but we're not supposed # to find out until we call result, which is where retry happend assert job.done() assert job.exception() is not None @thread def create_table(): time.sleep(1) # Give the first retry attempt time to fail. with contextlib.closing(google.cloud.bigquery.Client()) as client: client.query(f"create table {table_name} (id int64)").result() job_retry = {} if job_retry_on_query else dict(job_retry=retry_notfound) [[count]] = list(job.result(**job_retry)) assert count == 0 # The job was retried, and thus got a new job id assert job.job_id != job_id # Make sure we don't leave a thread behind: create_table.join() bigquery_client.query(f"drop table {table_name}").result()
def bulk_write(self, rows: Sequence[bigtable.row.DirectRow], slow_retry: bool = True, block_size: int = 2000) -> bool: """ Writes a list of mutated rows in bulk WARNING: If <rows> contains the same row (same row_key) and column key two times only the last one is effectively written to the BigTable (even when the mutations were applied to different columns) --> no versioning! :param rows: list list of mutated rows :param slow_retry: bool :param block_size: int """ if slow_retry: initial = 5 else: initial = 1 retry_policy = Retry(predicate=if_exception_type( (Aborted, DeadlineExceeded, ServiceUnavailable)), initial=initial, maximum=15.0, multiplier=2.0, deadline=20) for i_row in range(0, len(rows), block_size): status = self.table.mutate_rows(rows[i_row:i_row + block_size], retry=retry_policy) if not all(status): raise Exception(status) return True
# [START howto_operator_vision_detect_image_param] DETECT_IMAGE = {"source": {"image_uri": GCP_VISION_ANNOTATE_IMAGE_URL}} # [END howto_operator_vision_detect_image_param] with models.DAG('example_gcp_vision_autogenerated_id', default_args=default_args, schedule_interval=None) as dag_autogenerated_id: # ################################## # # ### Autogenerated IDs examples ### # # ################################## # # [START howto_operator_vision_product_set_create] product_set_create = CloudVisionProductSetCreateOperator( location=GCP_VISION_LOCATION, product_set=product_set, retry=Retry(maximum=10.0), timeout=5, task_id='product_set_create', ) # [END howto_operator_vision_product_set_create] # [START howto_operator_vision_product_set_get] product_set_get = CloudVisionProductSetGetOperator( location=GCP_VISION_LOCATION, product_set_id="{{ task_instance.xcom_pull('product_set_create') }}", task_id='product_set_get', ) # [END howto_operator_vision_product_set_get] # [START howto_operator_vision_product_set_update] product_set_update = CloudVisionProductSetUpdateOperator(
async def test_list_documents_w_retry_timeout(self): from google.api_core.retry import Retry retry = Retry(predicate=object()) timeout = 123.0 await self._list_documents_helper(retry=retry, timeout=timeout)
CloudMemorystoreFailoverInstanceOperator, CloudMemorystoreGetInstanceOperator, CloudMemorystoreImportOperator, CloudMemorystoreListInstancesOperator, CloudMemorystoreScaleInstanceOperator, CloudMemorystoreUpdateInstanceOperator, ) TEST_GCP_CONN_ID = "test-gcp-conn-id" TEST_TASK_ID = "task-id" TEST_LOCATION = "test-location" TEST_INSTANCE_ID = "test-instance-id" TEST_INSTANCE = Instance(name="instance") TEST_INSTANCE_NAME = "test-instance-name" TEST_PROJECT_ID = "test-project-id" TEST_RETRY = Retry() # type: Retry TEST_TIMEOUT = 10 # type: float TEST_INSTANCE_SIZE = 4 # type: int TEST_METADATA = [("KEY", "VALUE")] # type: Sequence[Tuple[str, str]] TEST_OUTPUT_CONFIG = { "gcs_destination": { "uri": "gs://test-bucket/file.rdb" } } # type: Dict TEST_DATA_PROTECTION_MODE = FailoverInstanceRequest.DataProtectionMode.LIMITED_DATA_LOSS TEST_INPUT_CONFIG = { "gcs_source": { "uri": "gs://test-bucket/file.rdb" } } # type: Dict TEST_PAGE_SIZE = 100 # type: int
def test_transaction_get_w_query_w_retry_timeout(): from google.api_core.retry import Retry retry = Retry(predicate=object()) timeout = 123.0 _transaction_get_w_query_helper(retry=retry, timeout=timeout)
# Maximum number of mutations in bulk (MutateRowsRequest message): # (https://cloud.google.com/bigtable/docs/reference/data/rpc/ # google.bigtable.v2#google.bigtable.v2.MutateRowRequest) _MAX_BULK_MUTATIONS = 100000 VIEW_NAME_ONLY = enums.Table.View.NAME_ONLY class _BigtableRetryableError(Exception): """Retry-able error expected by the default retry strategy.""" DEFAULT_RETRY = Retry( predicate=if_exception_type(_BigtableRetryableError), initial=1.0, maximum=15.0, multiplier=2.0, deadline=120.0, # 2 minutes ) """The default retry strategy to be used on retry-able errors. Used by :meth:`~google.cloud.bigtable.table.Table.mutate_rows`. """ class TableMismatchError(ValueError): """Row from another table.""" class TooManyMutationsError(ValueError): """The number of mutations for bulk request is too big."""
def _transient_string_in_exception_message(exc): # type: (Exception) -> bool """Determines whether an exception's message contains a common message for transient errors. The exception's message containing one of these substrings is sufficient to determine that it is transient, but there can be transient exceptions whose messages do not contain these substrings. """ return ('The job encountered an internal error during execution' in str(exc) or 'Retrying the job may solve the problem' in str(exc)) # Retry object for errors encountered in making API calls (executing jobs, etc.) DEFAULT_RETRY_FOR_API_CALLS = Retry( # The predicate takes an exception and returns whether it is transient. predicate=lambda exc: (bq_retry.DEFAULT_RETRY._predicate(exc) or _transient_string_in_exception_message(exc)), deadline=DEFAULT_TIMEOUT_SEC) # Retry object for errors encountered while polling jobs in progress. # See https://github.com/googleapis/google-cloud-python/issues/6301 DEFAULT_RETRY_FOR_ASYNC_JOBS = Retry( # The predicate takes an exception and returns whether it is transient. predicate=lambda exc: (polling.DEFAULT_RETRY._predicate(exc) or _transient_string_in_exception_message(exc)), deadline=DEFAULT_TIMEOUT_SEC) class BigqueryBaseClient(object): """Stores credentials and pointers to a BigQuery project.
async def test_get_partitions_w_retry_timeout(self): from google.api_core.retry import Retry retry = Retry(predicate=object()) timeout = 123.0 await self._get_partitions_helper(retry=retry, timeout=timeout)
async def test_asynccollectionreference_list_documents_w_retry_timeout(): from google.api_core.retry import Retry retry = Retry(predicate=object()) timeout = 123.0 await _list_documents_helper(retry=retry, timeout=timeout)