def generate_report( client: bigquery.client.Client, flags: Mapping[str, Any]) -> None: """Generates the final BigQuery Table with channel-level attribution and ROAS. Args: client: BigQuery client. flags: Dictionary of all flag names to flag values. """ client.query(env.get_template('generate_report.sql').render(flags)).result()
def generate_report(client: bigquery.client.Client, params: Mapping[str, Any]) -> None: """Generates the final BigQuery Table with channel-level attribution and ROAS. Args: client: BigQuery client. params: Mapping of all template parameter names to values. """ client.query( jinja_env.get_template('generate_report.sql').render(params)).result()
def extract_fractribution_input_data( client: bigquery.client.Client, flags: Mapping[str, Any]) -> None: """Extracts the input data for fractribution into BigQuery. Args: client: BigQuery client. flags: Dictionary of all flag names to flag values. """ extract_data_sql = _strip_sql( env.get_template('extract_data.sql').render(flags)) # Issue the query, and call result() to wait for it to finish. No results # are returned as all output is stored on BigQuery. client.query(extract_data_sql).result()
def _run_sql(client: bigquery.client.Client, template_sql: str, params: Dict[str, Any]) -> bigquery.table.RowIterator: """Runs a SQL query. Args: client: BigQuery client. template_sql: The SQL query statement. params: SQL query parameters. Returns: RowIterator query object. """ sql = params['jinja_env'].get_template(template_sql).render(params) if params['verbose']: # Including a print here for easier debugging and to show pipeline progress. logging.info(sql) query_job = client.query(sql) while not query_job.done(): elapsed_seconds = time.time() - query_job.started.timestamp() logging.info('BigQuery job is [%s]. %s seconds elapsed... ', str(query_job.state), '%.2f' % elapsed_seconds) # Adds a sleep as a safeguard to avoid floods of requests. time.sleep(1) logging.info('BigQuery job is [%s].', query_job.state) return query_job.result()
def _run_sql(client: bigquery.client.Client, template_sql: str, params: Dict[str, Any]) -> bigquery.table.RowIterator: sql = _jinja_env.get_template(template_sql).render(params) if params['verbose']: # Including a print here for easier debugging and to show pipeline progress. print(sql) return client.query(sql).result()
def get_bigquery_bike_list(client: bigquery.client.Client) -> list: """get all from bikes""" get_bikes_query = ( 'SELECT * FROM `CanyonOutletBikeSaleData.CanyonOutletBikeSaleDataTable` ') query_job = client.query(get_bikes_query) # API request rows = query_job.result() # Waits for query to finish return rows
def query_from_file(client: bigquery.client.Client=None, query_path: str=""): """Execute query from file on BQ engine via client library. """ print(f"{query_path} execution started.") with open(query_path, 'r', encoding='utf-8') as f: query_str = f.read() query_job = client.query(query_str) # Make an API request. query_result = query_job.result() print(f"Query result is ... \n{query_result}")
def run_fractribution( client: bigquery.client.Client, flags: Mapping[str, Any]) -> None: """Runs fractribution on the extract_fractribution_input_data BigQuery tables. Args: client: BigQuery client. flags: Dictionary of all flag names to flag values. """ # Step 1: Extract the paths from the path_summary_table. frac = fractribution.Fractribution(client.query( env.get_template('select_path_summary_query.sql').render( path_summary_table=flags['path_summary_table']))) # Step 2: Run Fractribution frac.run_fractribution() frac.normalize_channel_to_attribution_names() # Step 3: Create the path_summary_table and upload the results. create_path_summary_table_sql = env.get_template( 'create_path_summary_results_table.sql').render(flags) client.query(create_path_summary_table_sql).result() frac.upload_path_summary(client, flags['path_summary_table'])
def run_fractribution(client: bigquery.client.Client, params: Mapping[str, Any]) -> None: """Runs fractribution on the extract_fractribution_input_data BigQuery tables. Args: client: BigQuery client. params: Mapping of all template parameter names to values. """ # Step 1: Extract the paths from the path_summary_table. frac = fractribution.Fractribution( client.query( jinja_env.get_template('select_path_summary_query.sql').render( path_summary_table=params['path_summary_table']))) frac.run_fractribution(params['attribution_model']) frac.normalize_channel_to_attribution_names() # Step 3: Create the path_summary_table and upload the results. create_path_summary_table_sql = jinja_env.get_template( 'create_path_summary_results_table.sql').render(params) client.query(create_path_summary_table_sql).result() frac.upload_path_summary(client, params['path_summary_table'])
def _extract_channels(client: bigquery.client.Client, flags: Dict[str, Any]) -> List[str]: """Updates the channel definitions from the channel flags. Args: client: BigQuery client. flags: Dictionary of all flag names to flag values. Returns: List of channel names. """ extract_channels_sql = env.get_template('extract_channels.sql').render( flags) return [row.channel for row in client.query(extract_channels_sql).result()]
def checkBikeIsntLoadedAlready(bikeData: list, client: bigquery.client.Client) -> list: """Gets the UID's from the database and checks the newly scraped UID's, returning only the new ones""" QUERY = ( 'SELECT UID FROM `CanyonOutletBikeSaleData.CanyonOutletBikeSaleDataTable` ') query_job = client.query(QUERY) # API request rows = query_job.result() # Waits for query to finish ExistingUID: list = [] for row in rows: ExistingUID.append(row.UID) UniqueBikestoAdd: list = [] for individualBike in bikeData: if individualBike[0] not in ExistingUID: UniqueBikestoAdd.append(individualBike) return UniqueBikestoAdd
def _extract_channels(client: bigquery.client.Client, params: Mapping[str, Any]) -> List[str]: """Returns the list of names by running extract_channels.sql. Args: client: BigQuery client. params: Mapping of template parameter names to values. Returns: List of channel names. Raises: ValueError: User-formatted error if channel is not a valid BigQuery column. """ extract_channels_sql = jinja_env.get_template( 'extract_channels.sql').render(params) channels = [ row.channel for row in client.query(extract_channels_sql).result() ] if fractribution.UNMATCHED_CHANNEL not in channels: channels.append(fractribution.UNMATCHED_CHANNEL) for channel in channels: if not _is_valid_column_name(channel): raise ValueError('Channel is not a legal BigQuery column name: ', channel) return channels