def __init__( self, project_id, reauth=False, private_key=None, auth_local_webserver=False, dialect="legacy", location=None, try_credentials=None, credentials=None, ): global context from google.api_core.exceptions import GoogleAPIError from google.api_core.exceptions import ClientError from pandas_gbq import auth self.http_error = (ClientError, GoogleAPIError) self.project_id = project_id self.location = location self.reauth = reauth self.private_key = private_key self.auth_local_webserver = auth_local_webserver self.dialect = dialect self.credentials = credentials self.credentials_path = _get_credentials_file() default_project = None # Load credentials from cache. if not self.credentials: self.credentials = context.credentials default_project = context.project # Credentials were explicitly asked for, so don't use the cache. if private_key or reauth or not self.credentials: self.credentials, default_project = auth.get_credentials( private_key=private_key, project_id=project_id, reauth=reauth, auth_local_webserver=auth_local_webserver, try_credentials=try_credentials, ) if self.project_id is None: self.project_id = default_project if self.project_id is None: raise ValueError( "Could not determine project ID and one was not supplied.") # Cache the credentials if they haven't been set yet. if context.credentials is None: context.credentials = self.credentials if context.project is None: context.project = self.project_id self.client = self.get_client() # BQ Queries costs $5 per TB. First 1 TB per month is free # see here for more: https://cloud.google.com/bigquery/pricing self.query_price_for_TB = 5.0 / 2**40 # USD/TB
def setup_bigquery_and_config(): project_id = os.environ.get("GOOGLE_PROJECT_ID") if not project_id: _, project_id = get_credentials() bigquery_client = bigquery.Client(project=project_id) datasets = [] config_files = [] def setup( validate_records=False, stream_data=False, replication_method="HYBRID", ): dataset_id = "target_bigquery_test_" + "".join(choice(ascii_uppercase) for i in range(12)) target_config = { "project_id": project_id, "dataset_id": dataset_id, "validate_records": validate_records, "stream_data": stream_data, "replication_method": replication_method, "disable_collection": True, } config_filename = f"target-config-{dataset_id}.json" config_files.append(config_filename) with open(config_filename, "w") as f: f.write(json.dumps(target_config)) datasets.append(bigquery_client.create_dataset(dataset_id)) return project_id, bigquery_client, config_filename, dataset_id yield setup for config_file in config_files: os.remove(config_file) for dataset in datasets: bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=False)
def __init__(self, project_id, reauth=False, private_key=None, auth_local_webserver=False, dialect='legacy', location=None): from google.api_core.exceptions import GoogleAPIError from google.api_core.exceptions import ClientError from pandas_gbq import auth self.http_error = (ClientError, GoogleAPIError) self.project_id = project_id self.location = location self.reauth = reauth self.private_key = private_key self.auth_local_webserver = auth_local_webserver self.dialect = dialect self.credentials_path = _get_credentials_file() self.credentials, default_project = auth.get_credentials( private_key=private_key, project_id=project_id, reauth=reauth, auth_local_webserver=auth_local_webserver) if self.project_id is None: self.project_id = default_project if self.project_id is None: raise ValueError( 'Could not determine project ID and one was not supplied.') self.client = self.get_client() # BQ Queries costs $5 per TB. First 1 TB per month is free # see here for more: https://cloud.google.com/bigquery/pricing self.query_price_for_TB = 5. / 2**40 # USD/TB
def main(buf=sys.stdin.buffer): _, project_id = get_credentials() parser = argparse.ArgumentParser() parser.add_argument("-c", "--config", help="Config file") args = parser.parse_args() config = {"project_id": project_id} if args.config: with open(args.config) as input: config.update(json.load(input)) if not config.get("disable_collection", False): logger.info( "Sending version information to singer.io. " + "To disable sending anonymous usage data, set " + 'the config parameter "disable_collection" to true' ) threading.Thread(target=send_usage_stats).start() input = io.TextIOWrapper(buf, encoding="utf-8") state = persist_lines(config, input) emit_state(state) logger.debug("Exiting normally") return state
def __init__( self, project_id, reauth=False, private_key=None, auth_local_webserver=False, dialect="standard", location=None, credentials=None, use_bqstorage_api=False, ): global context from google.api_core.exceptions import GoogleAPIError from google.api_core.exceptions import ClientError from pandas_gbq import auth self.http_error = (ClientError, GoogleAPIError) self.project_id = project_id self.location = location self.reauth = reauth self.private_key = private_key self.auth_local_webserver = auth_local_webserver self.dialect = dialect self.credentials = credentials default_project = None # Service account credentials have a project associated with them. # Prefer that project if none was supplied. if self.project_id is None and hasattr(self.credentials, "project_id"): self.project_id = credentials.project_id # Load credentials from cache. if not self.credentials: self.credentials = context.credentials default_project = context.project # Credentials were explicitly asked for, so don't use the cache. if private_key or reauth or not self.credentials: self.credentials, default_project = auth.get_credentials( private_key=private_key, project_id=project_id, reauth=reauth, auth_local_webserver=auth_local_webserver, ) if self.project_id is None: self.project_id = default_project if self.project_id is None: raise ValueError( "Could not determine project ID and one was not supplied." ) # Cache the credentials if they haven't been set yet. if context.credentials is None: context.credentials = self.credentials if context.project is None: context.project = self.project_id self.client = self.get_client() self.bqstorage_client = _make_bqstorage_client( use_bqstorage_api, self.credentials ) # BQ Queries costs $5 per TB. First 1 TB per month is free # see here for more: https://cloud.google.com/bigquery/pricing self.query_price_for_TB = 5.0 / 2 ** 40 # USD/TB