def run( self, project: str = None, credentials: dict = None, credentials_secret: str = None, dataset: str = None, table: str = None, schema: List[bigquery.SchemaField] = None, ): """ Run method for this Task. Invoked by _calling_ this Task within a Flow context, after initialization. Args: - project (str, optional): the project to initialize the BigQuery Client with; if not provided, will default to the one inferred from your credentials - credentials (dict, optional): a JSON document containing Google Cloud credentials. You should provide these at runtime with an upstream Secret task. - credentials_secret (str, optional, DEPRECATED): the name of the Prefect Secret containing a JSON representation of your Google Application credentials - dataset (str, optional): the name of a dataset in that the table will be created - table (str, optional): the name of a table to create - schema (List[bigquery.SchemaField], optional): the schema to use when creating the table Returns: - None Raises: - SUCCESS: a `SUCCESS` signal if the table already exists """ client = get_client( project=project, credentials=credentials, credentials_secret=credentials_secret, ) try: dataset_ref = client.get_dataset(dataset) except NotFound: self.logger.debug( "Dataset {} not found, creating...".format(dataset)) dataset_ref = client.create_dataset(dataset) table_ref = dataset_ref.table(table) try: client.get_table(table_ref) raise SUCCESS("{dataset}.{table} already exists.".format( dataset=dataset, table=table)) except NotFound: self.logger.debug("Table {} not found, creating...".format(table)) table = bigquery.Table(table_ref, schema=schema) # partitioning if self.time_partitioning: table.time_partitioning = self.time_partitioning # cluster for optimal data sorting/access if self.clustering_fields: table.clustering_fields = self.clustering_fields client.create_table(table)
def run( self, project: str = None, credentials_secret: str = None, dataset: str = None, table: str = None, schema: List[bigquery.SchemaField] = None, ): """ Run method for this Task. Invoked by _calling_ this Task within a Flow context, after initialization. Args: - project (str, optional): the project to initialize the BigQuery Client with; if not provided, will default to the one inferred from your credentials - credentials_secret (str, optional): the name of the Prefect Secret containing a JSON representation of your Google Application credentials; defaults to `"GOOGLE_APPLICATION_CREDENTIALS"` - dataset (str, optional): the name of a dataset in which the table will be created - table (str, optional): the name of a table to create - schema (List[bigquery.SchemaField], optional): the schema to use when creating the table Returns: - None Raises: - SUCCESS: a `SUCCESS` signal if the table already exists """ creds = Secret(credentials_secret).get() credentials = Credentials.from_service_account_info(creds) project = project or credentials.project_id client = bigquery.Client(project=project, credentials=credentials) try: dataset_ref = client.get_dataset(dataset) except NotFound: self.logger.debug( "Dataset {} not found, creating...".format(dataset)) dataset_ref = client.create_dataset(dataset) table_ref = dataset_ref.table(table) try: client.get_table(table_ref) raise SUCCESS("{dataset}.{table} already exists.".format( dataset=dataset, table=table)) except NotFound: self.logger.debug("Table {} not found, creating...".format(table)) table = bigquery.Table(table_ref, schema=schema) # partitioning if self.time_partitioning: table.time_partitioning = self.time_partitioning # cluster for optimal data sorting/access if self.clustering_fields: table.clustering_fields = self.clustering_fields client.create_table(table)
def test_signals_dont_pass_invalid_arguments_to_states(): with pytest.raises(TypeError): raise SUCCESS(bad_result=100)
def succeed(): raise SUCCESS()