def main(): parser = optparse.OptionParser() parser.add_option("-c", "--ConfigFile", dest="config_file", help="INI Configuration file." ) parser.add_option("-y", "--Years", dest="year_list", help="List of years to retrieve the sample data for. Format is [YYYY-MM-DD]" ) parser.add_option("-f", "--OutfileDirectory", dest="out_dir", help="Directory to write the historical records.") (options, args) = parser.parse_args() config_file = ConfigParser.RawConfigParser() config_file.read(options.config_file) logConfFile = config_file.get('logging', 'config_file') if logConfFile: logging.config.fileConfig(logConfFile) logger = logging.getLogger(__name__) logger.info("Log file opened.") try: schema_url = config_file.get("dhec_soap_service", "schema_url") base_url = config_file.get("dhec_soap_service", "base_url") boundaries_location_file = config_file.get('boundaries_settings', 'boundaries_file') sites_location_file = config_file.get('boundaries_settings', 'sample_sites') except ConfigParser.Error as e: if logger: logger.exception(e) else: mb_sites = mb_sample_sites(True) mb_sites.load_sites(file_name=sites_location_file, boundary_file=boundaries_location_file) dates = options.year_list.split(',') sample_data = get_historical_samples(dates, mb_sites, schema_url, base_url) complete_file = "%s.csv" % (os.path.join(options.out_dir, 'etcoc_all_stations')) if logger: logger.info("Creating file: %s to write all results" % (complete_file)) with open(complete_file, "w") as complete_station_etcoc_file: for station in mb_sites: try: file_name = "%s.csv" % (os.path.join(options.out_dir, station.name)) if logger: logger.info("Creating file: %s to write results" % (file_name)) with open(file_name, "w") as station_etcoc_file: station_data = sample_data[station.name] for sample_rec in station_data['results']: station_etcoc_file.write('%s,%s,%s\n' % (station.name, sample_rec['date'], sample_rec['value'])) complete_station_etcoc_file.write('%s,%s,%s\n' % (station.name, sample_rec['date'], sample_rec['value'])) except (IOError, Exception) as e: if logger: logger.exception(e)
logger = logging.getLogger('mb_wq_predicition_logger') logger.info("Log file opened.") except ConfigParser.Error, e: traceback.print_exc(e) sys.exit(-1) try: boundaries_location_file = config_file.get('boundaries_settings', 'boundaries_file') sites_location_file = config_file.get('boundaries_settings', 'sample_sites') except ConfigParser.Error,e: if logger: logger.exception(e) else: #Load the sample site information. Has name, location and the boundaries that contain the site. mb_sites = mb_sample_sites() mb_sites.load_sites(file_name=sites_location_file, boundary_file=boundaries_location_file) #Build watershed groups. watersheds = {} for site in mb_sites: if site.contained_by[0] is not None: if site.contained_by[0].name.lower() not in watersheds: watersheds[site.contained_by[0].name.lower()] = [] watersheds[site.contained_by[0].name.lower()].append(site.name) if len(options.model_csv_dir): model_csv_list = glob.glob('%s/*.csv' % (options.model_csv_dir)) start_line = 1 header_row = [ "Location", "Site",
def run_wq_models(self, **kwargs): prediction_testrun_date = datetime.now() try: config_file = ConfigParser.RawConfigParser() config_file.read(kwargs['config_file_name']) data_collector_plugin_directories=config_file.get('data_collector_plugins', 'plugin_directories').split(',') self.collect_data(data_collector_plugin_directories=data_collector_plugin_directories) boundaries_location_file = config_file.get('boundaries_settings', 'boundaries_file') sites_location_file = config_file.get('boundaries_settings', 'sample_sites') xenia_wq_db_file = config_file.get('database', 'name') #MOve xenia obs db settings into standalone ini. We can then #check the main ini file into source control without exposing login info. db_settings_ini = config_file.get('password_protected_configs', 'settings_ini') xenia_obs_db_config_file = ConfigParser.RawConfigParser() xenia_obs_db_config_file.read(db_settings_ini) xenia_obs_db_host = xenia_obs_db_config_file.get('xenia_observation_database', 'host') xenia_obs_db_user = xenia_obs_db_config_file.get('xenia_observation_database', 'user') xenia_obs_db_password = xenia_obs_db_config_file.get('xenia_observation_database', 'password') xenia_obs_db_name = xenia_obs_db_config_file.get('xenia_observation_database', 'database') output_plugin_dirs=config_file.get('output_plugins', 'plugin_directories').split(',') except (ConfigParser.Error, Exception) as e: self.logger.exception(e) else: #Load the sample site information. Has name, location and the boundaries that contain the site. mb_sites = mb_sample_sites() mb_sites.load_sites(file_name=sites_location_file, boundary_file=boundaries_location_file) #Retrieve the data needed for the models. mb_wq_data = mb_wq_model_data(xenia_wq_db_name=xenia_wq_db_file, xenia_obs_db_type='postgres', xenia_obs_db_host=xenia_obs_db_host, xenia_obs_db_user=xenia_obs_db_user, xenia_obs_db_password=xenia_obs_db_password, xenia_obs_db_name=xenia_obs_db_name ) site_model_ensemble = [] #First pass we want to get all the data, after that we only need to query #the site specific pieces. reset_site_specific_data_only = False site_data = OrderedDict() total_time = 0 for site in mb_sites: try: #Get all the models used for the particular sample site. model_list = self.build_test_objects(config_file=config_file, site_name=site.name) if len(model_list): #Create the container for all the models. site_equations = wqEquations(site.name, model_list, True) #Get the station specific tide stations tide_station = config_file.get(site.name, 'tide_station') else: self.logger.error("No models found for site: %s" % (site.name)) except (ConfigParser.Error,Exception) as e: self.logger.exception(e) else: try: if len(model_list): mb_wq_data.reset(site=site, tide_station=tide_station ) site_data['station_name'] = site.name mb_wq_data.query_data(kwargs['begin_date'], kwargs['begin_date'], site_data, reset_site_specific_data_only) reset_site_specific_data_only = True site_equations.runTests(site_data) total_test_time = sum(testObj.test_time for testObj in site_equations.tests) self.logger.debug("Site: %s total time to execute models: %f ms" % (site.name, total_test_time * 1000)) total_time += total_test_time #Calculate some statistics on the entero results. This is making an assumption #that all the tests we are running are calculating the same value, the entero #amount. entero_stats = None if len(site_equations.tests): entero_stats = stats() for test in site_equations.tests: if test.mlrResult is not None: entero_stats.addValue(test.mlrResult) entero_stats.doCalculations() #Check to see if there is a entero sample for our date as long as the date #is not the current date. entero_value = None if datetime.now().date() != kwargs['begin_date'].date(): entero_value = check_site_date_for_sampling_date(site.name, kwargs['begin_date'], output_settings_ini, kwargs['use_logging']) site_model_ensemble.append({'metadata': site, 'models': site_equations, 'entero_value': None, 'statistics': entero_stats}) except Exception,e: self.logger.exception(e) self.logger.debug("Total time to execute all sites models: %f ms" % (total_time * 1000)) try: self.output_results(output_plugin_directories=output_plugin_dirs, site_model_ensemble=site_model_ensemble, prediction_date=kwargs['begin_date'], prediction_run_date=prediction_testrun_date) except Exception as e: self.logger.exception(e)
def run_wq_models(self, **kwargs): prediction_testrun_date = datetime.now() try: config_file = ConfigParser.RawConfigParser() config_file.read(kwargs['config_file_name']) data_collector_plugin_directories = config_file.get( 'data_collector_plugins', 'plugin_directories').split(',') self.collect_data(data_collector_plugin_directories= data_collector_plugin_directories) boundaries_location_file = config_file.get('boundaries_settings', 'boundaries_file') sites_location_file = config_file.get('boundaries_settings', 'sample_sites') xenia_wq_db_file = config_file.get('database', 'name') #MOve xenia obs db settings into standalone ini. We can then #check the main ini file into source control without exposing login info. db_settings_ini = config_file.get('password_protected_configs', 'settings_ini') xenia_obs_db_config_file = ConfigParser.RawConfigParser() xenia_obs_db_config_file.read(db_settings_ini) xenia_obs_db_host = xenia_obs_db_config_file.get( 'xenia_observation_database', 'host') xenia_obs_db_user = xenia_obs_db_config_file.get( 'xenia_observation_database', 'user') xenia_obs_db_password = xenia_obs_db_config_file.get( 'xenia_observation_database', 'password') xenia_obs_db_name = xenia_obs_db_config_file.get( 'xenia_observation_database', 'database') output_plugin_dirs = config_file.get( 'output_plugins', 'plugin_directories').split(',') except (ConfigParser.Error, Exception) as e: self.logger.exception(e) else: #Load the sample site information. Has name, location and the boundaries that contain the site. mb_sites = mb_sample_sites() mb_sites.load_sites(file_name=sites_location_file, boundary_file=boundaries_location_file) #Retrieve the data needed for the models. mb_wq_data = mb_wq_model_data( xenia_wq_db_name=xenia_wq_db_file, xenia_obs_db_type='postgres', xenia_obs_db_host=xenia_obs_db_host, xenia_obs_db_user=xenia_obs_db_user, xenia_obs_db_password=xenia_obs_db_password, xenia_obs_db_name=xenia_obs_db_name) site_model_ensemble = [] #First pass we want to get all the data, after that we only need to query #the site specific pieces. reset_site_specific_data_only = False site_data = OrderedDict() total_time = 0 for site in mb_sites: try: #Get all the models used for the particular sample site. model_list = self.build_test_objects( config_file=config_file, site_name=site.name) if len(model_list): #Create the container for all the models. site_equations = wqEquations(site.name, model_list, True) #Get the station specific tide stations tide_station = config_file.get(site.name, 'tide_station') else: self.logger.error("No models found for site: %s" % (site.name)) except (ConfigParser.Error, Exception) as e: self.logger.exception(e) else: try: if len(model_list): mb_wq_data.reset(site=site, tide_station=tide_station) site_data['station_name'] = site.name mb_wq_data.query_data( kwargs['begin_date'], kwargs['begin_date'], site_data, reset_site_specific_data_only) reset_site_specific_data_only = True site_equations.runTests(site_data) total_test_time = sum( testObj.test_time for testObj in site_equations.tests) self.logger.debug( "Site: %s total time to execute models: %f ms" % (site.name, total_test_time * 1000)) total_time += total_test_time #Calculate some statistics on the entero results. This is making an assumption #that all the tests we are running are calculating the same value, the entero #amount. entero_stats = None if len(site_equations.tests): entero_stats = stats() for test in site_equations.tests: if test.mlrResult is not None: entero_stats.addValue(test.mlrResult) entero_stats.doCalculations() #Check to see if there is a entero sample for our date as long as the date #is not the current date. entero_value = None if datetime.now().date( ) != kwargs['begin_date'].date(): entero_value = check_site_date_for_sampling_date( site.name, kwargs['begin_date'], output_settings_ini, kwargs['use_logging']) site_model_ensemble.append({ 'metadata': site, 'models': site_equations, 'entero_value': None, 'statistics': entero_stats }) except Exception, e: self.logger.exception(e) self.logger.debug("Total time to execute all sites models: %f ms" % (total_time * 1000)) try: self.output_results( output_plugin_directories=output_plugin_dirs, site_model_ensemble=site_model_ensemble, prediction_date=kwargs['begin_date'], prediction_run_date=prediction_testrun_date) except Exception as e: self.logger.exception(e)
xenia_obs_db_name = xenia_obs_db_config_file.get( 'xenia_observation_database', 'database') #output results config file. Again split out into individual ini file #for security. output_settings_ini = config_file.get('password_protected_configs', 'settings_ini') output_plugin_dirs = config_file.get('output_plugins', 'plugin_directories').split(',') except ConfigParser.Error, e: if logger: logger.exception(e) else: #Load the sample site information. Has name, location and the boundaries that contain the site. mb_sites = mb_sample_sites() mb_sites.load_sites(file_name=sites_location_file, boundary_file=boundaries_location_file) #Retrieve the data needed for the models. mb_wq_data = mb_wq_model_data( xenia_wq_db_name=xenia_wq_db_file, xenia_obs_db_type='postgres', xenia_obs_db_host=xenia_obs_db_host, xenia_obs_db_user=xenia_obs_db_user, xenia_obs_db_password=xenia_obs_db_password, xenia_obs_db_name=xenia_obs_db_name) site_model_ensemble = [] #First pass we want to get all the data, after that we only need to query #the site specific pieces.
def main(): parser = optparse.OptionParser() parser.add_option("-c", "--ConfigFile", dest="config_file", help="INI Configuration file.") parser.add_option("-y", "--ETCOCDirectory", dest="etcoc_directory", help="") parser.add_option('-d', "--OutputDirectory", dest="output_dir", default="", help="") (options, args) = parser.parse_args() config_file = ConfigParser.RawConfigParser() config_file.read(options.config_file) logConfFile = config_file.get('logging', 'config_file') if logConfFile: logging.config.fileConfig(logConfFile) logger = logging.getLogger(__name__) logger.info("Log file opened.") if logger: logger.info("Building unique sample dates list.") sample_sites = mb_sample_sites() sample_sites.load_sites(file_name=config_file.get('boundaries_settings', 'sample_sites'), boundary_file=config_file.get( 'boundaries_settings', 'boundaries_file')) tide_data_file = config_file.get('tide_data', 'file') historical_obs_db = config_file.get('historical_database', 'name') spatialite_lib = config_file.get('database', 'spatiaLiteLib') eastern_tz = timezone('US/Eastern') utc_tz = timezone('UTC') if len(options.output_dir): mb_data_obj = mb_wq_historical_data( tide_data_file=tide_data_file, xenia_database_name=historical_obs_db, xenia_nexrad_database_name=historical_obs_db, spatialite_lib=spatialite_lib) for sample_site in sample_sites: mb_data_obj.reset(site=sample_site) write_site_data_file_header = True data_file = os.path.join(options.output_dir, '%s_historical.csv' % (sample_site.name)) with open(data_file, 'w') as site_data_file: fields = ['station', 'date', 'etcoc'] sample_site_etcoc = os.path.join(options.etcoc_directory, "%s.csv" % (sample_site.name)) with open(sample_site_etcoc, "r") as etcoc_file: etcoc_read = csv.DictReader(etcoc_file, fieldnames=fields) row_id = 0 for row in etcoc_read: date_only, time_only = row['date'].split(' ') etcoc_value = row['etcoc'] wq_date = eastern_tz.localize( datetime.strptime(date_only, '%Y-%m-%d')) wq_utc_date = wq_date.astimezone(utc_tz) if logger: logger.debug( "Start building historical wq data for: %s Date/Time UTC: %s/EST: %s" % (sample_site.name, wq_utc_date, wq_date)) site_data = OrderedDict([ ('id', row_id), ('station_name', sample_site.name), ('sample_datetime', wq_date.strftime("%Y-%m-%d %H:%M:%S")), ('sample_datetime_utc', wq_utc_date.strftime("%Y-%m-%d %H:%M:%S")), ('enterococcus_value', etcoc_value) ]) try: mb_data_obj.query_data(wq_utc_date, wq_utc_date, site_data) except Exception, e: if logger: logger.exception(e) sys.exit(-1) else: header_buf = [] data = [] for key in site_data: if write_site_data_file_header: if '2ndavenorth' not in key: header_buf.append(key) else: header_buf.append( key.replace( '2ndavenorth', 'secondavenorth')) if site_data[key] != wq_defines.NO_DATA: data.append(str(site_data[key])) else: data.append("") if write_site_data_file_header: site_data_file.write(",".join(header_buf)) site_data_file.write('\n') header_buf[:] write_site_data_file_header = False site_data_file.write(",".join(data)) site_data_file.write('\n') site_data_file.flush() data[:] if logger: logger.debug( "Finished building historical wq data for: %s Date/Time UTC: %s/EST: %s" % (sample_site.name, wq_utc_date, wq_date)) row_id += 1
def main(): parser = optparse.OptionParser() parser.add_option("-c", "--ConfigFile", dest="config_file", help="INI Configuration file." ) parser.add_option("-y", "--ETCOCDirectory", dest="etcoc_directory", help="" ) parser.add_option('-d', "--OutputDirectory", dest="output_dir", default="", help="") (options, args) = parser.parse_args() config_file = ConfigParser.RawConfigParser() config_file.read(options.config_file) logConfFile = config_file.get('logging', 'config_file') if logConfFile: logging.config.fileConfig(logConfFile) logger = logging.getLogger(__name__) logger.info("Log file opened.") if logger: logger.info("Building unique sample dates list.") sample_sites = mb_sample_sites() sample_sites.load_sites(file_name=config_file.get('boundaries_settings', 'sample_sites'), boundary_file=config_file.get('boundaries_settings', 'boundaries_file')) tide_data_file = config_file.get('tide_data', 'file') historical_obs_db = config_file.get('historical_database', 'name') spatialite_lib = config_file.get('database', 'spatiaLiteLib') eastern_tz = timezone('US/Eastern') utc_tz = timezone('UTC') if len(options.output_dir): mb_data_obj = mb_wq_historical_data(tide_data_file=tide_data_file, xenia_database_name=historical_obs_db, xenia_nexrad_database_name=historical_obs_db, spatialite_lib=spatialite_lib) for sample_site in sample_sites: mb_data_obj.reset(site=sample_site) write_site_data_file_header = True data_file = os.path.join(options.output_dir, '%s_historical.csv' % (sample_site.name)) with open(data_file, 'w') as site_data_file: fields = ['station', 'date', 'etcoc'] sample_site_etcoc = os.path.join(options.etcoc_directory, "%s.csv" % (sample_site.name)) with open(sample_site_etcoc, "r") as etcoc_file: etcoc_read = csv.DictReader(etcoc_file, fieldnames=fields) row_id = 0 for row in etcoc_read: date_only, time_only = row['date'].split(' ') etcoc_value = row['etcoc'] wq_date = eastern_tz.localize(datetime.strptime(date_only, '%Y-%m-%d')) wq_utc_date = wq_date.astimezone(utc_tz) if logger: logger.debug("Start building historical wq data for: %s Date/Time UTC: %s/EST: %s" % (sample_site.name, wq_utc_date, wq_date)) site_data = OrderedDict([('id', row_id), ('station_name',sample_site.name), ('sample_datetime', wq_date.strftime("%Y-%m-%d %H:%M:%S")), ('sample_datetime_utc', wq_utc_date.strftime("%Y-%m-%d %H:%M:%S")), ('enterococcus_value', etcoc_value)]) try: mb_data_obj.query_data(wq_utc_date, wq_utc_date, site_data) except Exception,e: if logger: logger.exception(e) sys.exit(-1) else: header_buf = [] data = [] for key in site_data: if write_site_data_file_header: if '2ndavenorth' not in key: header_buf.append(key) else: header_buf.append(key.replace('2ndavenorth', 'secondavenorth')) if site_data[key] != wq_defines.NO_DATA: data.append(str(site_data[key])) else: data.append("") if write_site_data_file_header: site_data_file.write(",".join(header_buf)) site_data_file.write('\n') header_buf[:] write_site_data_file_header = False site_data_file.write(",".join(data)) site_data_file.write('\n') site_data_file.flush() data[:] if logger: logger.debug("Finished building historical wq data for: %s Date/Time UTC: %s/EST: %s" % (sample_site.name, wq_utc_date, wq_date)) row_id += 1