def historical(config, stid): """ Run services if they have a 'historical' attribute. """ global service_groups for service_group in config['Engine']['Services'].keys(): # Make sure we have defined a group to do what this asks if service_group not in service_groups: print('thetae.engine warning: doing nothing for services in %s' % service_group) continue for service in config['Engine']['Services'][service_group]: # Execute the service. try: get_object(service).historical(config, stid) except AttributeError: if config['debug'] > 9: print( "thetae.engine warning: no 'historical' attribute for service %s" % service) continue except BaseException as e: print( 'thetae.engine warning: failed to run historical for service %s' % service) print("*** Reason: '%s'" % str(e)) if config['traceback']: raise
def historical(config, stid): """ Function to produce historical web output, for a specific site. Iterates over web functions specified in config which have a 'historical' attribute, and begins at the config start_date. """ print('web.all: generating historical output for station %s' % stid) # Figure out which days we are forecasting for since config start_date. time_now = datetime.utcnow() forecast_dates = [] try: start_date = config_date_to_datetime( config['Stations'][stid]['history_start']) except: print( 'web.all warning: cannot get history_start in config for station %s, setting to -30 days' % stid) start_date = (datetime(time_now.year, time_now.month, time_now.day) - timedelta(days=30)) date = start_date while date < time_now: forecast_dates.append(date) date = date + timedelta(hours=24) if config['debug'] > 9: print('web.all: historical output starting %s' % start_date) # Get the output types from config try: output_types = list(config['Web']['outputs']) except KeyError: print( "web.all warning: no output specified by key 'outputs' in config!") return for output_type in output_types: print("web.all: producing '%s' output" % output_type) if config['debug'] > 9: print("web.all: output '%s' for station %s" % (output_type, stid)) try: # Each output script has a function 'main' which produces a specific output get_object('thetae.web.%s' % output_type).historical(config, stid) except AttributeError: if config['debug'] > 9: print("web.all: no historical '%s' output" % output_type) except BaseException as e: print('web.all: failed to output %s for %s' % (output_type, stid)) print("*** Reason: '%s'" % str(e)) if config['traceback']: raise continue return
def main(config): """ Main function. Iterates through all output scripts specified in config. """ # Figure out which day we are forecasting for: the next UTC day. time_now = datetime.utcnow() forecast_date = (datetime(time_now.year, time_now.month, time_now.day) + timedelta(days=1)) if config['debug'] > 9: print('web.all: forecast date %s' % forecast_date) # Get the output types from config try: output_types = list(config['Web']['outputs']) except KeyError: print( "web.all warning: no output specified by key 'outputs' in config!") return # If a config option is given to do outputs for all stations, do so try: plot_all_stations = to_bool( config['Web']['Options']['output_all_stations']) except: plot_all_stations = False if plot_all_stations: stations = config['Stations'].keys() else: stations = [config['current_stid']] # Do the outputs for output_type in output_types: print("web.all: producing '%s' output" % output_type) for stid in stations: if config['debug'] > 50: print("web.all: output '%s' for station %s" % (output_type, stid)) try: # Each web script has a function 'main' which produces a specific output get_object('thetae.web.%s' % output_type).main( config, stid, forecast_date) except BaseException as e: print('web.all: failed to output %s for %s' % (output_type, stid)) print("*** Reason: '%s'" % str(e)) if config['traceback']: raise continue
def get_latest_date(config, data_binding, stid, table_type='OBS'): """ Retrieve the latest datetime in a table for a station. :param config: :param data_binding: str: name of the data binding to use :param stid: str: station ID :param table_type: str: type of table :return: datetime: last available observation date """ # Get the database and the names of columns in the schema database = config['DataBinding'][data_binding]['database'] schema_name = config['DataBinding'][data_binding]['schema'] schema = get_object(schema_name).schema date_key = schema[table_type][0][0] table = '%s_%s' % (stid.upper(), table_type.upper()) conn = connection(config, database) cursor = conn.cursor() try: cursor.execute("SELECT %s FROM %s ORDER BY %s DESC LIMIT 1;" % (date_key, table, date_key)) last_dt = date_to_datetime(cursor.fetchone()[0]) except: last_dt = None return last_dt
def writeDaily(config, daily, data_binding, table_type): """ Writes a Daily object or list of Daily objects to the specified data_binding and table. table_type must be 'obs', 'verif', 'climo', 'hourly_forecast', or 'daily_forecast', or something defined in the schema of data_binding as %(stid)_%(table_type).upper(). :param config: :param daily: Daily: :param data_binding: str: name of database binding to write to :param table_type: str: type of table :return: """ def daily_to_row(daily, datestr, model, columns): """ Converts a Daily object to a sql row """ row = [] for column in columns: if column.upper() == 'DATETIME': row.append(datestr) elif column.upper() == 'MODEL': row.append(model) elif column.upper() != 'PRIMARY KEY': row.append(getattr(daily, column, None)) return tuple(row) # Get the database and the names of columns in the schema database = config['DataBinding'][data_binding]['database'] schema_name = config['DataBinding'][data_binding]['schema'] schema = get_object(schema_name).schema columns = [c[0] for c in schema[table_type.upper()]] if config['debug'] > 50: print('db.writeDaily: converting hourly data to columns and values as follows') print(columns) # Format data to pass to _write daily_sql = [] if type(daily) is list: stid = daily[0].stid for d in daily: if stid != d.stid: raise ValueError('db.writeDaily error: all forecasts in list must have the same station id.') datestr = date_to_string(d.date) row = daily_to_row(d, datestr, d.model, columns) if config['debug'] > 50: print(row) daily_sql.append(row) else: stid = daily.stid datestr = date_to_string(daily.date) row = daily_to_row(daily, datestr, daily.model, columns) if config['debug'] > 50: print(row) daily_sql.append(row) # Write to the database table = '%s_%s' % (stid.upper(), table_type.upper()) if config['debug'] > 9: print('db.writeDaily: writing data to table %s' % table) _write(config, daily_sql, database, table)
def main(config): """ Main function. Iterates through sites and models and writes each to the 'forecast' database. The outer loop over sites is more efficient for those APIs which have limited calls/minute. """ # Figure out which day we are forecasting for: the next UTC day. time_now = datetime.utcnow() forecast_date = (datetime(time_now.year, time_now.month, time_now.day) + timedelta(days=1)) print('getForecasts: forecast date %s' % forecast_date) # Go through the models in config for stid in config['Stations'].keys(): print('getForecasts: getting forecasts for station %s' % stid) # Get the forecast from the driver at each site for model in config['Models'].keys(): try: driver = config['Models'][model]['driver'] except KeyError: print( 'getForecasts warning: driver not specified for model %s' % model) continue if config['debug'] > 0: print('getForecasts: getting forecast from %s' % model) try: # Each forecast has a function 'main' which returns a Forecast forecast = get_object(driver).main(config, model, stid, forecast_date) # Set the model name forecast.set_model(model) except BaseException as e: print('getForecasts: failed to get forecast from %s for %s' % (model, stid)) print("*** Reason: '%s'" % str(e)) if config['traceback']: raise continue # Write to the database try: if config['debug'] > 9: print('getForecasts: writing forecast to database') writeForecast(config, forecast) except BaseException as e: print('getForecasts: failed to write forecast to database') print("*** Reason: '%s'" % str(e)) if config['traceback']: raise
def remove(config, stid): """ Remove the database tables for a given station ID. :param config: :param stid: str: station ID :return: """ for data_binding in config['DataBinding'].keys(): # Open the database and schema schema_name = config['DataBinding'][data_binding]['schema'] database = config['DataBinding'][data_binding]['database'] schema = get_object(schema_name).schema conn = connection(config, database) if conn is None: raise IOError('Error: db.remove cannot connect to database %s' % database) cursor = conn.cursor() # Find the tables in the db and requested by the schema schema_table_names = [ '%s_%s' % (stid.upper(), key) for key in schema.keys() ] if config['debug'] > 50: print('db.remove: found the following tables in schema:') print(schema_table_names) cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") sql_table_names = [table[0] for table in cursor.fetchall()] if config['debug'] > 0: print('db.remove: found the following tables in sql db:') print(sql_table_names) # For each table, check if it exists, and if so, delete it for table in schema_table_names: if table in sql_table_names: if config['debug'] > 0: print('db.remove: deleting table %s' % table) cursor.execute("DROP TABLE %s;" % table) conn.close()
def main(config): """ Main function. Runs the obs and verification for the past 24 hours. """ data_binding = 'forecast' # Figure out which day we are verifying for: today. time_now = datetime.utcnow() verif_date = datetime(time_now.year, time_now.month, time_now.day) print('getVerification: verification date %s' % verif_date) # Obs # Find the obs driver try: obs_driver = config['Verify']['Obs']['driver'] except KeyError: print('getVerification error: no driver specified for Obs!') raise # Iterate over stations. For stations other than current stid, rapid updates are not necessary. for stid in config['Stations'].keys(): # Check date if stid == config['current_stid']: last_obs_age = None else: try: last_obs_age = (get_latest_date(config, 'forecast', stid, 'OBS') - time_now).seconds // 3600 except BaseException as e: last_obs_age = None if last_obs_age is not None and last_obs_age < int(config['Verify']['obs_refresh_interval_hours']): if config['debug'] > 0: print('getVerification: obs recent enough for station %s, omitting this time' % stid) continue # Get the obs if config['debug'] > 0: print('getVerification: getting obs for station %s' % stid) try: # Verification and obs main() only need to know the stid obs = get_object(obs_driver).main(config, stid) except BaseException as e: print('getVerification: failed to get obs for %s' % stid) print("*** Reason: '%s'" % str(e)) if config['traceback']: raise continue # Write to the database try: if config['debug'] > 9: print('getVerification: writing obs to database') writeTimeSeries(config, obs, data_binding, 'obs') except BaseException as e: print('getVerification: failed to write obs to database') print("*** Reason: '%s'" % str(e)) if config['traceback']: raise # Verification # Find the verification driver try: verif_driver = config['Verify']['Verification']['driver'] except KeyError: print('getVerification error: no driver specified for Verification!') raise # Iterate over stations. For stations other than current stid, rapid updates are not necessary. for stid in config['Stations'].keys(): # Check date if stid == config['current_stid']: last_verif_age = None else: try: last_verif_age = (get_latest_date(config, 'forecast', stid, 'VERIF') - time_now).seconds // 3600 except BaseException as e: last_verif_age = None if last_verif_age is not None and last_verif_age < int(config['Verify']['obs_refresh_interval_hours']): if config['debug'] > 0: print('getVerification: verification recent enough for station %s, omitting this time' % stid) continue # Get the verification if config['debug'] > 0: print('getVerification: getting verification for station %s' % stid) try: # Verification and obs main() only need to know the stid verification = get_object(verif_driver).main(config, stid) except BaseException as e: print('getVerification: failed to get verification for %s' % stid) print("*** Reason: '%s'" % str(e)) if config['traceback']: raise continue # Write to the database try: if config['debug'] > 9: print('getVerification: writing verification to database') writeDaily(config, verification, data_binding, 'verif') except BaseException as e: print('getVerification: failed to write verification to database') print("*** Reason: '%s'" % str(e)) if config['traceback']: raise
def historical(config, stid): """ Retrive historical verification (and climo!) for a stid. """ data_binding = 'forecast' # Figure out which days we want since config history_start time_now = datetime.utcnow() try: start_date = config_date_to_datetime(config['Stations'][stid]['history_start']) except: print('getVerification warning: cannot get history_start in config for ' 'station %s, setting to -30 days' % stid) start_date = (datetime(time_now.year, time_now.month, time_now.day) - timedelta(days=30)) print('getVerification: getting historical data for %s starting %s' % (stid, start_date)) # Obs # Find the obs driver try: obs_driver = config['Verify']['Obs']['driver'] except KeyError: print('getVerification error: no driver specified for Obs!') raise # Get obs if config['debug'] > 9: print('getVerification: getting historical obs') try: # Verification and obs historical() need config, stid, start_date obs = get_object(obs_driver).historical(config, stid, start_date) except BaseException as e: print('getVerification: failed to get historical obs for %s' % stid) print("*** Reason: '%s'" % str(e)) if config['traceback']: raise # Write to the database try: if config['debug'] > 9: print('getVerification: writing historical obs to database') writeTimeSeries(config, obs, data_binding, 'obs') except BaseException as e: print('getVerification: failed to write historical obs to database') print("*** Reason: '%s'" % str(e)) if config['traceback']: raise # Verification # Find the verification driver try: verif_driver = config['Verify']['Verification']['driver'] except KeyError: print('getVerification error: no driver specified for Verification!') raise # Get verification if config['debug'] > 9: print('getVerification: getting historical verification') try: # Verification and obs historical() need config, stid, start_date verification = get_object(verif_driver).historical(config, stid, start_date) except BaseException as e: print('getVerification: failed to get historical verification for %s' % stid) print("*** Reason: '%s'" % str(e)) if config['traceback']: raise # Write to the database try: if config['debug'] > 9: print('getVerification: writing historical verification to database') writeDaily(config, verification, data_binding, 'verif') except BaseException as e: print('getVerification: failed to write historical verification to database') print("*** Reason: '%s'" % str(e)) if config['traceback']: raise # Climo # Find the climo driver try: climo_driver = config['Verify']['Climo']['driver'] except KeyError: print('getVerification error: no driver specified for Climo!') raise # Get obs if config['debug'] > 9: print('getVerification: getting historical climatology') try: # Verification and obs historical() need config, stid, start_date climo = get_object(climo_driver).historical(config, stid) except BaseException as e: print('getVerification: failed to get climo for %s' % stid) print("*** Reason: '%s'" % str(e)) if config['traceback']: raise # Write to the database try: if config['debug'] > 9: print('getVerification: writing climo to database') writeDaily(config, climo, data_binding, 'climo') except BaseException as e: print('getVerification: failed to write climo to database') print("*** Reason: '%s'" % str(e)) if config['traceback']: raise return
def main(args): """ Main engine process. """ global service_groups # Get the config file. config = get_config(args.config) if config['suppress_warnings']: warnings.filterwarnings('ignore') # Create the site_data archive directory, if necessary. site_directory = '%s/site_data' % config['THETAE_ROOT'] if not (os.path.isdir(site_directory)): os.makedirs(site_directory) # Check for output suppression or only output options global service_groups if args.no_output: service_groups.remove('output_services') print('thetae.engine: suppressing output') if args.output_only: service_groups = ['output_services'] print('thetae.engine: doing output only') # Step 1: check the database initialization print('thetae.engine: running database initialization checks') add_sites = thetae.db.init(config, no_climo=args.no_climo) # Check for backfill-historical sites if args.b_stid is not None: print('thetae.engine: running backfill of historical data') if len(args.b_stid) == 0: print('thetae.engine: all sites selected') sites = config['Stations'].keys() else: sites = args.b_stid for stid in sites: historical(config, stid) sys.exit(0) # Check for database resets if args.r_stid is not None: print('thetae.engine: performing database reset') if len(args.r_stid) == 0: print('thetae.engine: error: no sites selected!') sys.exit(1) for stid in args.r_stid: thetae.db.remove(config, stid) sys.exit(0) # Step 2: for each site in add_sites above, run historical data for stid in add_sites: historical(config, stid) # Steps 3-6: run services! for service_group in config['Engine']['Services'].keys(): # Make sure we have defined a group to do what this asks if service_group not in service_groups: print('thetae.engine warning: doing nothing for services in %s' % service_group) continue for service in config['Engine']['Services'][service_group]: # Execute the service try: get_object(service).main(config) except BaseException as e: print('thetae.engine warning: failed to run service %s' % service) print("*** Reason: '%s'" % str(e)) if config['traceback']: raise
def init(config, reset_old=False, no_climo=False): """ Initializes new station IDs in the databases. Returns a list of all sites included in config that require historical data to be retrieved. Also creates a database if it does not exist. :param config: :param reset_old: if True, erases tables if they are too old :param no_climo: if True, does not check "CLIMO" tables """ add_sites = [] for data_binding in config['DataBinding'].keys(): # Open the database and schema schema_name = config['DataBinding'][data_binding]['schema'] database = config['DataBinding'][data_binding]['database'] schema = get_object(schema_name).schema conn = connection(config, database) if conn is None: raise IOError('Error: db.init cannot connect to database %s' % database) cursor = conn.cursor() # Iterate through stations in the config for stid in config['Stations'].keys(): add_site = False # Find the tables in the db and requested by the schema schema_table_names = ['%s_%s' % (stid.upper(), key) for key in schema.keys()] schema_table_structures = list(schema.values()) # Schema must have primary (datetime) key listed first date_keys = [schema[key][0][0] for key in schema.keys()] if config['debug'] > 50: print('db.init: found the following tables in schema:') print(schema_table_names) cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") sql_table_names = [table[0] for table in cursor.fetchall()] if config['debug'] > 50: print('db.init: found the following tables in sql db:') print(sql_table_names) # For each requested table, create it if it doesn't exist for t, table in enumerate(schema_table_names): if no_climo and 'CLIMO' in table.upper(): if config['debug'] > 9: print('db.init: ignoring table %s' % table) continue if not (table in sql_table_names): # Something was missing, so we need to add the site to the output list add_site = True # A string of all table columns and types if config['debug'] > 0: print('db.init: need to create table %s' % table) sqltypestr = ', '.join(["%s %s" % _type for _type in schema_table_structures[t]]) cursor.execute("CREATE TABLE %s (%s);" % (table, sqltypestr,)) else: # Check if data in table are recent time_now = datetime.utcnow() if table != stid.upper() + '_CLIMO': recent = timedelta(days=30) else: recent = time_now - datetime(last_leap_year(time_now), 12, 31) key = date_keys[t] try: cursor.execute("SELECT %s FROM %s ORDER BY %s DESC LIMIT 1;" % (key, table, key)) last_dt = date_to_datetime(cursor.fetchone()[0]) except: last_dt = None if last_dt is None or (time_now - last_dt > recent): # Old or missing data, drop table and recreate it add_site = True if reset_old: if config['debug'] > 0: print('db.init: %s table too old, resetting it' % table) cursor.execute("DROP TABLE %s;" % table) sqltypestr = ', '.join(["%s %s" % _type for _type in schema_table_structures[t]]) cursor.execute("CREATE TABLE %s (%s);" % (table, sqltypestr,)) else: if config['debug'] > 0: print('db.init: %s table is old, adding to historical' % table) # Lastly, add the site if we need to rerun historical data if add_site and stid not in add_sites: add_sites.append(stid) elif config['debug'] > 0: print('db.init: nothing to do for station %s' % stid) conn.close() return add_sites
def writeTimeSeries(config, timeseries, data_binding, table_type): """ Writes a TimeSeries object or list of TimeSeries objects to the specified data_binding and table. table_type must be 'obs', 'verif', 'climo', 'hourly_forecast', or 'daily_forecast', or something defined in the schema of data_binding as %(stid)_%(table_type).upper(). The structure of the timeseries pandas databases should match the schema specified in the data_binding. :param config: :param timeseries: TimeSeries: :param data_binding: str: name of database binding to write to :param table_type: str: type of table :return: """ def hourly_to_row(hourly, model, cols): """ Converts an hourly timeseries to sql rows """ if config['debug'] > 50: print('db.writeTimeSeries: converting timeseries data to SQL rows') sql = [] hourly.columns = [c.upper() for c in hourly.columns] cols = [c.upper() for c in cols] for pd_row in hourly.itertuples(): try: datestr = date_to_string(pd_row.DATETIME.to_pydatetime()) except TypeError: datestr = pd_row.DATETIME row = [] for col in cols: if col == 'DATETIME': row.append(datestr) elif col == 'MODEL': row.append(model) elif col != 'PRIMARY KEY': try: row.append(float(getattr(pd_row, col))) except AttributeError: row.append(None) except (TypeError, ValueError): row.append(getattr(pd_row, col)) sql.append(tuple(row)) return sql # Get the database and the names of columns in the schema database = config['DataBinding'][data_binding]['database'] schema_name = config['DataBinding'][data_binding]['schema'] schema = get_object(schema_name).schema columns = [c[0] for c in schema[table_type.upper()]] if config['debug'] > 50: print('db.writeTimeSeries: converting hourly data to columns and values as follows') print(columns) # Format data to pass to _write if type(timeseries) is list: hourly_sql = [] stid = timeseries[0].stid for ts in timeseries: if stid != ts.stid: raise ValueError('db.writeTimeSeries error: all forecasts in list must have the same station id.') # Datetime must be derived from pandas dataframe of timeseries series = hourly_to_row(ts.data, ts.model, columns) if config['debug'] > 50: print(series) # Add the series (all lists) hourly_sql += series else: stid = timeseries.stid series = hourly_to_row(timeseries.data, timeseries.model, columns) if config['debug'] > 50: print(series) hourly_sql = series # Write to the database table = '%s_%s' % (stid.upper(), table_type.upper()) if config['debug'] > 9: print('db.writeTimeSeries: writing data to table %s' % table) _write(config, hourly_sql, database, table)
def main(config): """ Main function. Runs the obs and verification for the past 24 hours. """ data_binding = 'forecast' # Figure out which day we are verifying for: today. time_now = datetime.utcnow() verif_date = datetime(time_now.year, time_now.month, time_now.day) print('getVerification: verification date %s' % verif_date) # Verification # Find the verification driver try: verif_driver = config['Verify']['Verification']['driver'] except KeyError: print('getVerification error: no driver specified for Verification!') raise # Iterate over stations for stid in config['Stations'].keys(): if config['debug'] > 9: print('getVerification: getting verification for station %s' % stid) try: # Verification and obs main() only need to know the stid verification = get_object(verif_driver).main(config, stid) except BaseException as e: print('getVerification: failed to get verification for %s' % stid) print("*** Reason: '%s'" % str(e)) if config['traceback']: raise continue # Write to the database try: if config['debug'] > 9: print('getVerification: writing verification to database') writeDaily(config, verification, data_binding, 'verif') except BaseException as e: print('getVerification: failed to write verification to database') print("*** Reason: '%s'" % str(e)) if config['traceback']: raise # Obs # Find the obs driver try: obs_driver = config['Verify']['Obs']['driver'] except KeyError: print('getVerification error: no driver specified for Obs!') raise # Iterate over stations for stid in config['Stations'].keys(): # Get the obs if config['debug'] > 9: print('getVerification: getting obs for station %s' % stid) try: # Verification and obs main() only need to know the stid obs = get_object(obs_driver).main(config, stid) except BaseException as e: print('getVerification: failed to get obs for %s' % stid) print("*** Reason: '%s'" % str(e)) if config['traceback']: raise continue # Write to the database try: if config['debug'] > 9: print('getVerification: writing obs to database') writeTimeSeries(config, obs, data_binding, 'obs') except BaseException as e: print('getVerification: failed to write obs to database') print("*** Reason: '%s'" % str(e)) if config['traceback']: raise
def historical(config, stid): """ Function to obtain historical forecast data, for a specific site. Iterates over models which have the 'historical' parameter set to True, and begins at the config start_date. """ print('getForecasts: getting historical forecasts for station %s' % stid) # Figure out which days we are forecasting for since config start_date. time_now = datetime.utcnow() forecast_dates = [] try: start_date = config_date_to_datetime( config['Stations'][stid]['history_start']) except: print( 'getForecasts warning: cannot get history_start in config for station %s, setting to -30 days' % stid) start_date = (datetime(time_now.year, time_now.month, time_now.day) - timedelta(days=30)) date = start_date while date < time_now: forecast_dates.append(date) date = date + timedelta(hours=24) if config['debug'] > 9: print('getForecasts: getting historical forecasts starting %s' % start_date) # Go through the models in config for model in config['Models'].keys(): if not (to_bool(config['Models'][model].get('historical', False))): if config['debug'] > 9: print('getForecasts: no historical to do for model %s' % model) continue try: driver = config['Models'][model]['driver'] except KeyError: print('getForecasts warning: driver not specified for model %s' % model) continue # Get the forecasts from the driver try: # Each driver should have a function 'historical' which returns a list of Forecasts print('getForecasts: getting historical forecasts from %s' % model) forecasts = get_object(driver).historical(config, model, stid, forecast_dates) # Set the model name forecasts = [f.set_model(model) for f in forecasts] except BaseException as e: print( 'getForecasts: failed to get historical forecasts from %s for %s' % (model, stid)) print("*** Reason: '%s'" % str(e)) if config['traceback']: raise continue # Write to the database try: if config['debug'] > 9: print('getForecasts: writing historical forecasts to database') writeForecast(config, forecasts) except BaseException as e: print( 'getForecasts: failed to write historical forecasts to database' ) print("*** Reason: '%s'" % str(e)) if config['traceback']: raise return