def setup_logging_client(log_name: str): """ This function will connect to gcluod and post all logging commands there as well as locally The "Log Name" can then be set to the name of the service as a filter https://console.cloud.google.com/logs?project=development-278003 :param log_name: The custom name of the log file that will be used in gcloud :return: """ if os.path.exists("config/GCLOUD_LOGGING_SERVICE_KEY.json"): # The GCLOUD_LOGGING_SERVICE_KEY exists in circle ci, and is passed through to the service # There is one for each environment. eg. development: # console.cloud.google.com/iam-admin/serviceaccounts/details/104042617795891603364?project=development-278003 os.environ[ "GOOGLE_APPLICATION_CREDENTIALS"] = "config/GCLOUD_LOGGING_SERVICE_KEY.json" # Instantiates a client and handler for logging with gcloud client = google.cloud.logging.Client() handler = CloudLoggingHandler(name=log_name, client=client) logging.getLogger().setLevel(logging.INFO) # defaults to WARN setup_logging(handler) logging.debug("Logging connected to GCloud") else: print("No GCLOUD_LOGGING_SERVICE_KEY detected, using native logging.")
def setup_logging(self, log_level=logging.INFO, excluded_loggers=EXCLUDED_LOGGER_DEFAULTS, **kw): """Attach default Stackdriver logging handler to the root logger. This method uses the default log handler, obtained by :meth:`~get_default_handler`, and attaches it to the root Python logger, so that a call such as ``logging.warn``, as well as all child loggers, will report to Stackdriver logging. :type log_level: int :param log_level: (Optional) Python logging log level. Defaults to :const:`logging.INFO`. :type excluded_loggers: tuple :param excluded_loggers: (Optional) The loggers to not attach the handler to. This will always include the loggers in the path of the logging client itself. :type kw: dict :param kw: keyword args passed to handler constructor """ handler = self.get_default_handler(**kw) setup_logging(handler, log_level=log_level, excluded_loggers=excluded_loggers)
def setup_logging( self, log_level=logging.INFO, excluded_loggers=EXCLUDED_LOGGER_DEFAULTS, **kw ): """Attach default Stackdriver logging handler to the root logger. This method uses the default log handler, obtained by :meth:`~get_default_handler`, and attaches it to the root Python logger, so that a call such as ``logging.warn``, as well as all child loggers, will report to Stackdriver logging. :type log_level: int :param log_level: (Optional) Python logging log level. Defaults to :const:`logging.INFO`. :type excluded_loggers: tuple :param excluded_loggers: (Optional) The loggers to not attach the handler to. This will always include the loggers in the path of the logging client itself. :type kw: dict :param kw: keyword args passed to handler constructor """ handler = self.get_default_handler(**kw) setup_logging(handler, log_level=log_level, excluded_loggers=excluded_loggers)
def setup(): logger = logging.getLogger() # Set the region on log records. default_factory = logging.getLogRecordFactory() logging.setLogRecordFactory(partial(region_record_factory, default_factory)) # Send logs directly via the logging client if possible. This ensures trace # ids are propogated and allows us to send structured messages. if environment.in_gae(): client = Client() handler = StructuredAppEngineHandler(client) handlers.setup_logging(handler, log_level=logging.INFO) for handler in logger.handlers: if not isinstance(handler, StructuredAppEngineHandler): logger.removeHandler(handler) else: logging.basicConfig() for handler in logger.handlers: # If writing directly to Stackdriver, send a structured message. if isinstance(handler, StructuredAppEngineHandler): handler.setFormatter(StructuredLogFormatter()) # Otherwise, the default stream handler requires a string. else: handler.setFormatter( logging.Formatter( "(%(region)s) %(module)s/%(funcName)s : %(message)s"))
def _call_fut(self, handler, excludes=None): from google.cloud.logging.handlers import setup_logging if excludes: return setup_logging(handler, excluded_loggers=excludes) else: return setup_logging(handler)
def create_app(config, debug=False, testing=False, config_overrides=None): app = Flask(__name__) app.config.from_object(config) app.debug = debug app.testing = testing if config_overrides: app.config.update(config_overrides) # [START setup_logging] if not app.testing: client = google.cloud.logging.Client(app.config['PROJECT_ID']) handler = CloudLoggingHandler(client) # Attaches the handler to the root logger setup_logging(handler) logging.getLogger().setLevel(logging.INFO) # [END setup_logging] # Setup the data model. with app.app_context(): model = get_model() model.init_app(app) # Initalize the OAuth2 helper. oauth2.init_app( app, scopes=['email', 'profile'], authorize_callback=_request_user_info) # Add a logout handler. @app.route('/logout') def logout(): # Delete the user's profile and the credentials stored by oauth2. del session['profile'] session.modified = True oauth2.storage.delete() return redirect(request.referrer or '/') # Register the Bookshelf CRUD blueprint. from .crud import crud app.register_blueprint(crud, url_prefix='/books') # Add a default root route. @app.route("/") def index(): return redirect(url_for('crud.list')) # Add an error handler. This is useful for debugging the live application, # however, you should disable the output of the exception for production # applications. @app.errorhandler(500) def server_error(e): return """ An internal error occurred: <pre>{}</pre> See logs for full stacktrace. """.format(e), 500 return app
def setup_logger() -> object: '''Setup main application logger ''' # Setup uncaught exception handler default_exception_handler() if app_config['app_log_factory'] == 'gcp' and os.getenv( 'GOOGLE_APPLICATION_CREDENTIALS') is None: app_config[ 'app_log_factory'] = 'file' # Prevent set up GCP logging without credentials ENV variable logging.error( 'GOOGLE_APPLICATION_CREDENTIALS ENV variable is missing, logging set to file output' ) if app_config['app_log_factory'] == 'gcp': # Disable whole cherrypy console logging cherrypy.log.screen = False logging.getLogger("cherrypy").propagate = False # Connect GCP logging to default Python logger logger = logging.getLogger() # Remove original log handlers for handler in logger.handlers: logger.removeHandler(handler) # Setup Google Cloud Logging client = google.cloud.logging.Client() # Setup CloudLoggingHandler(logging.StreamHandler) handler explicitly with Custom GCP Formatter handler = CloudLoggingHandler(client, labels={ 'app_name': app_config['app_name'], 'app_version': app_config['app_version'], 'app_environment': app_config['app_env'] }) handler.setFormatter(CustomGCPFormatter()) # Setup Python logger explicitly with custom handler setup_logging(handler) elif app_config['app_log_factory'] == 'file': # Load log configuration logging.config.dictConfig(LOG_CONFIG) # Custom app logger logger = logging.getLogger('app') else: # Load log configuration logging.config.dictConfig(LOG_CONFIG) # Custom app logger logger = logging.getLogger() return logger
def create_app(config, debug=False, testing=False, config_overrides=None): app = Flask(__name__) app.config.from_object(config) app.debug = debug app.testing = testing if config_overrides: app.config.update(config_overrides) # [START setup_logging] if not app.testing: client = google.cloud.logging.Client(app.config['PROJECT_ID']) handler = CloudLoggingHandler(client) # Attaches the handler to the root logger setup_logging(handler) logging.getLogger().setLevel(logging.INFO) # [END setup_logging] # Setup the data model. with app.app_context(): model = get_model() model.init_app(app) # Initalize the OAuth2 helper. oauth2.init_app(app, scopes=['email', 'profile'], authorize_callback=_request_user_info) # Add a logout handler. @app.route('/logout') def logout(): # Delete the user's profile and the credentials stored by oauth2. del session['profile'] session.modified = True oauth2.storage.delete() return redirect(request.referrer or '/') # Register the Bookshelf CRUD blueprint. from .crud import crud app.register_blueprint(crud, url_prefix='/books') # Add a default root route. @app.route("/") def index(): return redirect(url_for('crud.list')) # Add an error handler. This is useful for debugging the live application, # however, you should disable the output of the exception for production # applications. @app.errorhandler(500) def server_error(e): return """ An internal error occurred: <pre>{}</pre> See logs for full stacktrace. """.format(e), 500 return app
def configure(global_config, **settings): gcp_project = settings.get('idris.google_cloud_project') gcp_auth = settings.get('idris.google_application_credentials') if gcp_project and gcp_auth: os.environ['GOOGLE_CLOUD_PROJECT'] = gcp_project os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.abspath( gcp_auth) if settings.get('idris.use_google_cloud_logging') == 'true': if 'GAE_INSTANCE' in os.environ: client = google.cloud.logging.Client() else: client = google.cloud.logging.Client().from_service_account_json( settings['idris.google_application_credentials']) handler = CloudLoggingHandler(client) logging.getLogger().setLevel(logging.INFO) setup_logging(handler) config = Configurator(settings=settings, root_factory=root_factory) config.add_tween('idris.token_tween_factory') config.include('cornice') config.include('cornice_swagger') config.include('pyramid_chameleon') config.include('pyramid_jwt') config.include('idris.storage') config.include('idris.blob') config.include('idris.services.lookup') config.include('idris.services.cache') config.include('idris.services.download_counter') config.include('idris.services.auditlog') config.include('idris.apps.base') config.include('idris.apps.course') config.set_authorization_policy(ACLAuthorizationPolicy()) config.set_jwt_authentication_policy(settings['idris.secret'], http_header='Authorization', auth_type='Bearer', expiration=3600, callback=add_role_principals) config.scan("idris.views") config.add_route('liveness_check', '/_live') config.add_route('readiness_check', '/_ready') config.add_route('api_without_slash', '/api') config.add_view(lambda _, __: HTTPFound('/api/'), route_name='api_without_slash') config.add_static_view('api', path='idris:static/dist/swagger') config.add_static_view('static', path='idris:static/dist/web') config.add_route('edit_without_slash', '/edit') config.add_view(lambda _, __: HTTPFound('/edit/'), route_name='edit_without_slash') return config
def init_gcloud_log(project_id, logger_name): log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" if IS_DEV_ENV or IS_TEST_ENV: logging.basicConfig(filename=logger_name + u'.log', level=logging.DEBUG, format=log_format) else: # see: https://googlecloudplatform.github.io/google-cloud-python/latest/logging-usage.html#cloud-logging-handler # and https://github.com/GoogleCloudPlatform/getting-started-python/blob/master/6-pubsub/bookshelf/__init__.py#L40 client = google.cloud.logging.Client(project_id) # NB: we should use AppEngineHandler for server at next google.cloud API update # https://googlecloudplatform.github.io/google-cloud-python/latest/logging-handlers-app-engine.html handler = CloudLoggingHandler(client, logger_name) handler.setFormatter(logging.Formatter(log_format)) setup_logging(handler) logging.getLogger().setLevel(logging.INFO) logging.getLogger("readability.readability").setLevel(logging.WARNING) # very verbose package
def setup_gce_logging(gunicorn_access_logger, gunicorn_error_logger): # pragma: no cover if application.config['SQLALCHEMY_DATABASE_URI'][:22] in [ 'postgresql://localhost', 'db://localhost/test_db' ]: return import google.cloud.logging from google.cloud.logging.handlers import CloudLoggingHandler, setup_logging client = google.cloud.logging.Client() handler = CloudLoggingHandler(client, name=get_env()) setup_logging(handler) gunicorn_access_logger.addHandler(handler) gunicorn_error_logger.addHandler(handler)
def setup() -> None: """Setup logging""" # Set the region on log records. logging.setLogRecordFactory(ContextualLogRecord) logger = logging.getLogger() # Send logs directly via the logging client if possible. This ensures trace # ids are propagated and allows us to send structured messages. if environment.in_gcp(): client = Client() structured_handler = StructuredAppEngineHandler(client) handlers.setup_logging(structured_handler, log_level=logging.INFO) before_request_handler = StructuredAppEngineHandler( client, name=BEFORE_REQUEST_LOG) logging.getLogger(BEFORE_REQUEST_LOG).addHandler( before_request_handler) # Streams unstructured logs to stdout - these logs will still show up # under the appengine.googleapis.com/stdout Stackdriver logs bucket, # even if other logs are stalled on the global interpreter lock or some # other issue. stdout_handler = logging.StreamHandler(sys.stdout) handlers.setup_logging(stdout_handler, log_level=logging.INFO) for handler in logger.handlers: if not isinstance( handler, (StructuredAppEngineHandler, logging.StreamHandler)): logger.removeHandler(handler) else: logging.basicConfig() for handler in logger.handlers: # If we aren't writing directly to Stackdriver, prefix the log with important # context that would be in the labels. if not isinstance(handler, StructuredAppEngineHandler): handler.setFormatter( logging.Formatter( "[pid: %(process)d] (%(region)s) %(module)s/%(funcName)s : %(message)s" )) # Export gunicorn errors using the same handlers as other logs, so that they # go to Stackdriver in production. gunicorn_logger = logging.getLogger("gunicorn.error") gunicorn_logger.handlers = logger.handlers
def setup(): """Setup logging""" # Set the region on log records. default_factory = logging.getLogRecordFactory() logging.setLogRecordFactory(partial(region_record_factory, default_factory)) logger = logging.getLogger() # Send logs directly via the logging client if possible. This ensures trace # ids are propogated and allows us to send structured messages. if environment.in_gcp(): client = Client() handler = StructuredAppEngineHandler(client) handlers.setup_logging(handler, log_level=logging.INFO) # Streams unstructured logs to stdout - these logs will still show up # under the appengine.googleapis.com/stdout Stackdriver logs bucket, # even if other logs are stalled on the global interpreter lock or some # other issue. stdout_handler = logging.StreamHandler(sys.stdout) handlers.setup_logging(stdout_handler, log_level=logging.INFO) for handler in logger.handlers: if not isinstance( handler, (StructuredAppEngineHandler, logging.StreamHandler)): logger.removeHandler(handler) else: logging.basicConfig() for handler in logger.handlers: # If writing directly to Stackdriver, send a structured message. if isinstance(handler, StructuredAppEngineHandler): handler.setFormatter(StructuredLogFormatter()) # Otherwise, the default stream handler requires a string. else: handler.setFormatter( logging.Formatter( "(%(region)s) %(module)s/%(funcName)s : %(message)s")) # Export gunicorn errors using the same handlers as other logs, so that they # go to Stackdriver in production. gunicorn_logger = logging.getLogger("gunicorn.error") gunicorn_logger.handlers = logger.handlers
def getLogger(name, level=__loglevel): formatter = logging.Formatter('%(nasip)s %(nasid)s %(siteid)s %(msgtype)s %(message)s', '%a, %d %b %Y %H:%M:%S', ) logger = logging.getLogger(name) logger.setLevel(level) logger.addFilter(log_ctx) if GRAYLOG_SERVER: client = google.cloud.logging.Client() cloud_handler = CloudLoggingHandler(client, name="radiusd") logger.addHandler(cloud_handler) setup_logging(cloud_handler) cloud_handler.addFilter(log_ctx) cloud_handler.setFormatter(formatter) else: stream_handler = logging.StreamHandler(sys.stdout) stream_handler.setFormatter(formatter) logger.addHandler(stream_handler) return logger
def setup_stackdriver(log_level, name, log_format, excluded_loggers=None): try: from google.cloud.logging import Client from google.cloud.logging import handlers as google_logging_handlers from google.cloud.logging.handlers.handlers import \ EXCLUDED_LOGGER_DEFAULTS, \ CloudLoggingHandler except ImportError: raise ValueError("google-cloud-logging is not properly installed") if not excluded_loggers: excluded_loggers = EXCLUDED_LOGGER_DEFAULTS client = Client() # the docstring of CloudLoggingHandler point to client instead of Client # noinspection PyTypeChecker handler = CloudLoggingHandler(client, name) handler.setFormatter(logging.Formatter(log_format, None, "%")) google_logging_handlers.setup_logging(handler, log_level=log_level, excluded_loggers=excluded_loggers)
def _initialize_stackdriver_logging(self): stackdriver_client = google.cloud.logging.Client() stackdriver_handler = CloudLoggingHandler(stackdriver_client,name=__name__, resource=self.log_resource, labels={}) setup_logging(stackdriver_handler)
import logging logger = logging.getLogger("gs-scroller.main") if __name__ == '__main__': logging.getLogger("gs-scroller").setLevel(logging.INFO) else: logging.getLogger("gs-scroller").setLevel(logging.INFO) try: import google.cloud.logging logging_client = google.cloud.logging.Client() from google.cloud.logging.handlers import CloudLoggingHandler, setup_logging logging_handler = CloudLoggingHandler(logging_client) except ImportError: logger.warning("Google's logging module could not be imported") else: setup_logging(logging_handler) logger.info("Google's logging module was imported") logger.debug("Debug messages will be logged") try: from google.appengine.api import wrap_wsgi_app except ImportError: logger.warning("Google's appengine wrapper could not be imported") wrap_wsgi_app = None else: logger.info("Google's appengine wrapper was imported") from converters import (Base64Converter, DigitsConverter, DigitListConverter) from cache import temporary_cache import urlread
def runflow(sample_size, runner, argv=None): from pipeline_package import pipe_module as pm parser = argparse.ArgumentParser() known_args, pipeline_args = parser.parse_known_args(argv) config_object = ConfigParser() config_object.read("config.ini") session_info = config_object["SESSION_INFO"] bigquery_config = config_object["BIGQUERY_CONFIG"] db_name = session_info["session_name"] bq_project = bigquery_config["project_name"] bq_account = bigquery_config["account_name"] bq_auth = bigquery_config["google_credential_path"] model_bucket = bigquery_config["model_bucket"] model_path = bigquery_config["model_path"] model_dest = bigquery_config["model_dest"] os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = bq_auth session_id = bq_account + '_' + db_name client = google.cloud.logging.Client() handler = CloudLoggingHandler(client, name=session_id) logging.getLogger().setLevel(logging.INFO) # defaults to WARN setup_logging(handler) dataset_name = bq_account + '_' + db_name base_table_id = bq_project + '.' + dataset_name + '.' + 'base_tweets' pred_table_id = bq_project + '.' + dataset_name + '.' + 'tweet_predictions' # Construct a BigQuery client object. client = bigquery.Client() # create the tweet_predictions table if does not exist try: client.get_table(pred_table_id) # Make an API request. msg = "Table {} already exists.".format(pred_table_id) print(msg) logging.info(msg) except NotFound: msg = "Table {} is not found. Creating table...".format(pred_table_id) print(msg) logging.info(msg) schema = [ bigquery.SchemaField("tweet_id", "INTEGER", mode="REQUIRED"), bigquery.SchemaField("prediction", "STRING", mode="REQUIRED"), ] table = bigquery.Table(pred_table_id, schema=schema) table = client.create_table(table) # Make an API request. msg = f'Created table {pred_table_id}' logging.info(msg) source_query = pm.get_source_query(sample_size) sample_size_desc = pm.get_sample_size_desc(sample_size) # options = PipelineOptions( # flags=[], # project=bq_project, # job_name='sentiment-prediction-job-pt', # temp_location='gs://sentiment-model-ja/temp', # staging_location='gs://sentiment-model-ja/temp', # region='us-central1', # max_num_workers=2) with beam.Pipeline(runner, argv=pipeline_args) as pipeline: table_schema = beam_bq.TableSchema() # Fields that use standard types. id_schema = beam_bq.TableFieldSchema() id_schema.name = 'tweet_id' id_schema.type = 'integer' id_schema.mode = 'required' table_schema.fields.append(id_schema) predict_schema = beam_bq.TableFieldSchema() predict_schema.name = 'prediction' predict_schema.type = 'string' predict_schema.mode = 'required' table_schema.fields.append(predict_schema) ( pipeline | 'Read from BigQuery {}'.format(sample_size_desc) >> beam.io.ReadFromBigQuery(query=source_query, use_standard_sql=True) | 'predict' >> beam.ParDo(pm.Predict_bert(project=bq_project, bucket_name=model_bucket, model_path=model_path, destination_file_name=model_dest)) | "Write data to BQ" >> beam.io.WriteToBigQuery(table='tweet_predictions', dataset=dataset_name, schema=table_schema, project=bq_project, write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND) ) job = pipeline.run() if runner == 'DataflowRunner': job.wait_until_finish()
account_name = bigquery_config["account_name"] bq_auth = bigquery_config["google_credential_path"] session_id = account_name + '_' + db_name os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = bq_auth if not os.path.exists(os.path.join(os.path.dirname(__file__), db_name)): os.makedirs(db_name) logging.basicConfig(filename=os.path.join(db_name, 'tweets_capture.log'), level=logging.INFO) client = google.cloud.logging.Client() handler = CloudLoggingHandler(client, name=session_id) logging.getLogger().setLevel(logging.INFO) # defaults to WARN setup_logging(handler) log_name = '_' + db_name + '_to_process.txt' error_file_log_name = '_' + db_name + '_error_files.txt' log_path = os.path.join(db_name, log_name) error_file_log_path = os.path.join(db_name, error_file_log_name) db_backlog = open(os.path.join(db_name, log_name), 'a+') db_backlog.close() error_file_log = open(os.path.join(db_name, error_file_log_name), 'a+') error_file_log.close() # inherit from StreamListener class class SListener(StreamListener):
def do_logging_setup(log_level=logging.DEBUG): client = google.cloud.logging.Client() handler = CloudLoggingHandler(client) logging.getLogger().setLevel(log_level) setup_logging(handler)
def main(): os.chdir(os.path.dirname(__file__)) #Read config.ini file config_object = ConfigParser() config_object.read("config.ini") session_info = config_object["SESSION_INFO"] postgres_config = config_object["POSTGRES_CONFIG"] bigquery_config = config_object["BIGQUERY_CONFIG"] db_type = session_info["database"] db_name = session_info["session_name"] psql_server = postgres_config["host"] psql_port = postgres_config["port"] psql_user = postgres_config["username"] psql_password = postgres_config["password"] bq_project = bigquery_config["project_name"] bq_account = bigquery_config["account_name"] bq_auth = bigquery_config["google_credential_path"] os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = bq_auth session_id = bq_account + '_' + db_name log_name = '_' + db_name + '_to_process.txt' error_file_log_name = '_' + db_name + '_error_files.txt' backlog_file = os.path.join(db_name, log_name) error_file_log = os.path.join(db_name, error_file_log_name) client = google.cloud.logging.Client() handler = CloudLoggingHandler(client, name=session_id) logging.getLogger().setLevel(logging.INFO) # defaults to WARN setup_logging(handler) print( f'Begin:\nProcessing json files in {log_name}\n{db_type}:\nProject: {bq_project}\nDataset: {bq_account}.{db_name}' ) # for handler in logging.root.handlers[:]: # logging.root.removeHandler(handler) logging.basicConfig(filename=os.path.join(db_name, 'tweets_processing.log'), level=logging.INFO) while True: while Path(backlog_file).stat().st_size == 0: wait_msg = f'WAITING {str(datetime.datetime.now())}: waiting for json data' print(wait_msg) logging.info(wait_msg) time.sleep(60) if Path(backlog_file).stat().st_size > 0: with open(backlog_file, 'r') as f: lines = f.readlines() file_raw = lines[0] file = file_raw.strip('\n') with open(backlog_file, 'w') as update: for line in lines: if line != file_raw: update.write(line) if os.path.isfile(file): check_conn = False while check_conn == False: try: msg = f'Connecting to {db_type}...' print(msg) logging.info(msg) if db_type == 'postgres': url = make_url( f'postgresql://{psql_user}:{psql_password}@{psql_server}:{psql_port}/{db_name}' ) engine = sqlalchemy.create_engine(url) with engine.connect() as con: rs = con.execute('SELECT 1') print(rs) check_conn = True msg = 'Postgres connection successful!' logging.info(msg) print(msg) if not database_exists( engine.url): #db does not exist # create a new database create_database(engine.url) msg = f"{db_name} postgres database not found. Creating new database." logging.info(msg) print(msg) elif db_type == 'bigquery': project_name = bq_project dataset_name = bq_account + '_' + db_name os.environ[ "GOOGLE_APPLICATION_CREDENTIALS"] = bq_auth client = bigquery.Client() # set pandas_gbq project pandas_gbq.context.project = project_name # Set dataset_id to the ID of the dataset to determine existence. dataset_id = project_name + '.' + dataset_name try: client.get_dataset( dataset_id) # Make an API request. msg = "BigQuery: Dataset {} already exists".format( dataset_id) print(msg) logging.info(msg) check_conn = True except NotFound: msg = "BigQuery: Dataset {} is not found. Creating dataset.".format( dataset_id) print(msg) logging.info(msg) dataset = bigquery.Dataset(dataset_id) # TODO(developer): Specify the geographic location where the dataset should reside. dataset.location = "US" # Send the dataset to the API for creation, with an explicit timeout. # Raises google.api_core.exceptions.Conflict if the Dataset already # exists within the project. dataset = client.create_dataset( dataset, timeout=30) # Make an API request. msg = "BigQuery: Created dataset {}.{}".format( client.project, dataset.dataset_id) logging.info(msg) print(msg) except Exception as e: # error_msg = f'ERROR {str(datetime.datetime.now())}: Connection to postgres could not be established' # logging.info(error_msg) template = "An exception of type {0} occurred. Arguments:\n{1!r}" message = template.format(type(e).__name__, e.args) print(message) print(traceback.format_exc()) logging.info(message) check_conn = False with open(error_file_log, 'a+') as f: f.write(file_raw) sys.exit() try: file_lst = glob.glob(file) for file_idx, file_name in enumerate(file_lst): if file_idx == 0: option = 'append' else: option = 'append' process_start_msg = f'PROCESSING: {str(datetime.datetime.now())}: processing {file}' print(process_start_msg) logging.info(process_start_msg) try: to_sql_g = json2df(file_name) print('Conversion of json to df complete.') except Exception as e: template = "An exception of type {0} occurred during json to df conversion or prediction. Arguments:\n{1!r}" message = template.format( type(e).__name__, e.args) print(message) print(traceback.format_exc()) logging.info(message) else: for df_name, df in to_sql_g.items(): table_id = project_name + '.' + dataset_name + '.' + df_name msg = f'{str(datetime.datetime.now())}: uploading {df_name} --> {db_type}:{table_id}' print(msg) logging.info(msg) if db_type == 'postgres': df.to_sql(df_name, con=engine, if_exists=option) elif db_type == 'bigquery': table_name = df_name # create new table and write df dest_table = dataset_name + '.' + table_name pandas_gbq.to_gbq(df, dest_table, if_exists='append') msg = f'{str(datetime.datetime.now())}: uploading {df_name} --> {db_type} complete' print(msg) logging.info(msg) if db_type == 'postgres': engine.dispose() cdate = str(datetime.datetime.now()) success_msg = 'SUCCESS {}: (DB {}) FILE {}'.format( cdate, db_type, file_name) logging.info(success_msg) except Exception as e: #cdate = str(datetime.datetime.now()) #error_msg = 'ERROR {}: FILE {}: {}'.format(cdate, file_name, e) template = "An exception of type {0} occurred. Arguments:\n{1!r}" message = template.format(type(e).__name__, e.args) print(message) print(traceback.format_exc()) logging.info(message) with open(error_file_log, 'a+') as f: f.write(file_raw) else: msg = f'{file} does not exist' print(msg) logging.info(msg) with open(error_file_log, 'a+') as f: f.write(file_raw) sys.exit()