def import_sample_sites(params): start_time = time.time() init_logging(app) area_name = params[0] sample_site_csv = params[1] boundaries_file = params[2] current_app.logger.debug("import_sample_sites started.") wq_sites = wq_sample_sites() wq_sites.load_sites(file_name=sample_site_csv, boundary_file=boundaries_file) row_entry_date = datetime.now() area_rec = db.session.query(Project_Area)\ .filter(Project_Area.area_name == area_name).first() #ADd the boundaries first for site in wq_sites: for contained_by in site.contained_by: try: bound = Boundary() bound.row_entry_date = row_entry_date.strftime('%Y-%m-%d %H:%M:%S') bound.project_site_id = area_rec.id bound.boundary_name = contained_by.name bound.wkb_boundary = contained_by.object_geometry.wkb bound.wkt_boundary = contained_by.object_geometry.to_wkt() current_app.logger.debug("Adding boundary: %s" % (bound.boundary_name)) db.session.add(bound) db.session.commit() except Exception as e: current_app.logger.exception(e) db.session.rollback() for site in wq_sites: try: site_rec = Sample_Site() site_rec.project_site_id = area_rec.id site_rec.row_entry_date = row_entry_date.strftime('%Y-%m-%d %H:%M:%S') site_rec.site_name = site.name site_rec.description = site.description site_rec.county = site.county site_rec.longitude = site.object_geometry.x site_rec.latitude = site.object_geometry.y site_rec.wkt_location = site.object_geometry.wkt site_rec.temporary_site = False #Look up boundaries for contained_by in site.contained_by: boundary_rec = db.session.query(Boundary)\ .filter(Boundary.boundary_name == contained_by.name).first() site_rec.boundaries.append(boundary_rec) current_app.logger.debug("Adding site: %s" % (site_rec.site_name)) db.session.add(site_rec) db.session.commit() except Exception as e: current_app.logger.exception(e) db.session.rollback() current_app.logger.debug("import_sample_sites finished in %f seconds" % (time.time()-start_time))
def run(self): logger = None start_time = time.time() try: #self.logging_client_cfg['disable_existing_loggers'] = True #logging.config.dictConfig(self.logging_client_cfg) logging.config.fileConfig(self.log_config) logger = logging.getLogger(self.__class__.__name__) logger.debug("run started.") try: wq_sites = wq_sample_sites() if wq_sites.load_sites(file_name=self.sample_sites_file, boundary_file=self.boundaries_file): start_year = datetime.now().date().year download_historical_sample_data( output_directory=self.source_directory, url=self.base_url, start_year=start_year, end_year=start_year - 1, logger_name=self.__class__.__name__) parse_files(sample_sites=wq_sites, src_data_directory=self.source_directory, output_directory=self.sample_site_directory, logger_name=self.__class__.__name__) else: logger.error( "Failed to load sites file: %s %s" % (self.sample_sites_file, self.boundaries_file)) except (IOError, Exception) as e: if logger is not None: logger.exception(e) except Exception as e: if logger is not None: logger.exception(e) else: traceback.print_exc(e) finally: if logger is not None: logger.debug("run finished in %f seconds" % (time.time() - start_time)) return
def run(self): start_time = time.time() logger = None try: xenia_db = None #self.logging_client_cfg['disable_existing_loggers'] = True #logging.config.dictConfig(self.logging_client_cfg) logging.config.fileConfig(self.log_config) logger = logging.getLogger(self.__class__.__name__) logger.setLevel(logging.DEBUG) logger.debug("run started.") config_file = ConfigParser.RawConfigParser() config_file.read(self.plugin_details.get('Settings', 'ini_file')) xenia_obs_db_name = config_file.get('database', 'name') #xenia_db = wqDB(xenia_obs_db_name, self.__class__.__name__) xenia_db = sqliteAlchemy() xenia_db.connectDB(databaseType='sqlite', dbHost=xenia_obs_db_name, dbUser=None, dbPwd=None, dbName=None, printSQL=False) units_file = config_file.get("units_conversion", "config_file") units_conversion = uomconversionFunctions(units_file) boundaries_location_file = config_file.get('boundaries_settings', 'boundaries_file') sites_location_file = config_file.get('boundaries_settings', 'sample_sites') wq_sites = wq_sample_sites() wq_sites.load_sites(file_name=sites_location_file, boundary_file=boundaries_location_file) for site in ndbc_sites: self.get_ndbc_data(site, ndbc_obs, self.begin_date, units_conversion, xenia_db) for site in nws_site: self.get_nws_data(site, nws_obs, self.begin_date, units_conversion, xenia_db) for site in nos_sites: self.get_nos_data(site, nos_obs, self.begin_date, units_conversion, xenia_db) except Exception,e: if logger is not None: logger.exception(e) else: traceback.print_exc(e)
def main(): parser = optparse.OptionParser() parser.add_option("--ConfigFile", dest="config_file", default=None, help="INI Configuration file.") parser.add_option("--OutputDirectory", dest="out_dir", default='./') parser.add_option( "--ProcessNWSData", dest="process_nws_data", action="store_true", help= "If set this will process NWS data, either by querying the server or processing the json files." ) parser.add_option("--NWSDataFile", dest="nws_file", help="Historical metar file to process.") parser.add_option( "--ProcessNOSData", dest="process_nos_data", action="store_true", help= "If set this will process NOS data, either by querying the server or processing the json files." ) parser.add_option( "--QueryNOS", dest="query_nos", action="store_true", help= "If set this will query the NDBC web service to get the historical data." ) parser.add_option( "--ProcessNDBCData", dest="process_ndbc_data", action="store_true", help= "If set this will process NDBC data, either by querying the server or processing the json files." ) parser.add_option( "--QueryNDBC", dest="query_ndbc", action="store_true", help= "If set this will query the NOS web service to get the historical data." ) parser.add_option("--ProcessTideData", dest="process_tide_data", action="store_true", help="If set this will process Tide data") parser.add_option("--StartDate", dest="starting_date", default=None) parser.add_option( "--QueryCTD", dest="query_ctd", action="store_true", help= "If set this will query the web service to get the historical data.") parser.add_option( "--ProcessCTDData", dest="process_ctd_data", action="store_true", help= "If set this will process CTD data, either by querying the server or processing the json files." ) (options, args) = parser.parse_args() if options.config_file is None: parser.print_help() sys.exit(-1) try: config_file = ConfigParser.RawConfigParser() config_file.read(options.config_file) except Exception as e: raise else: try: logConfFile = config_file.get('logging', 'config_file') logging.config.fileConfig(logConfFile) logger = logging.getLogger(__name__) logger.info("Log file opened.") boundaries_location_file = config_file.get('boundaries_settings', 'boundaries_file') sites_location_file = config_file.get('boundaries_settings', 'sample_sites') sample_data_dir = config_file.get('historic_sample_data', 'directory') historical_db_name = config_file.get("database", "name") units_file = config_file.get("units_conversion", "config_file") if options.starting_date is not None: start_date = timezone('US/Eastern').localize( datetime.strptime(options.starting_date, "%Y-%m-%d")) except ConfigParser.Error as e: if logger: logger.exception(e) else: units_conversion = uomconversionFunctions(units_file) historic_db = wqDB(historical_db_name, __name__) wq_sites = wq_sample_sites() wq_sites.load_sites(file_name=sites_location_file, boundary_file=boundaries_location_file) #Get all dates. all_dates = get_dates(data_directory=sample_data_dir, station_data=wq_sites, start_date=start_date) if options.process_ndbc_data: process_ndbc_data(output_directory=options.out_dir, query_ndbc=options.query_ndbc, all_dates=all_dates, db_obj=historic_db, units_converter=units_conversion) if options.process_nos_data: process_nos_data(output_directory=options.out_dir, query_nos=options.query_nos, all_dates=all_dates, db_obj=historic_db, units_converter=units_conversion) if options.process_tide_data: process_tide_data(output_directory=options.out_dir, all_dates=all_dates, log_config_file=logConfFile) if options.process_nws_data: process_nws_data(output_directory=options.out_dir, all_dates=all_dates, db_obj=historic_db, units_converter=units_conversion, file_list=options.nws_file.split(",")) if options.process_ctd_data: process_ctd_data(output_directory=options.out_dir, query_ctd=options.query_ctd, all_dates=all_dates, db_obj=historic_db, units_converter=units_conversion) logger.info("Closing log.") #historical_wq_data = kdh_historical_wq_data() #wq_data = {} #historical_wq_data.get_ctd_data(datetime.now(),wq_data ) return
def run_wq_models(self, **kwargs): prediction_testrun_date = datetime.now() try: begin_date = kwargs['begin_date'] config_file = ConfigParser.RawConfigParser() config_file.read(kwargs['config_file_name']) boundaries_location_file = config_file.get('boundaries_settings', 'boundaries_file') sites_location_file = config_file.get('boundaries_settings', 'sample_sites') wq_sites = wq_sample_sites() wq_sites.load_sites(file_name=sites_location_file, boundary_file=boundaries_location_file) enable_output_plugins = config_file.getboolean( 'output_plugins', 'enable_plugins') output_plugin_dirs = config_file.get( 'output_plugins', 'plugin_directories').split(',') enable_data_collector_plugins = config_file.getboolean( 'data_collector_plugins', 'enable_plugins') data_collector_plugin_directories = config_file.get( 'data_collector_plugins', 'plugin_directories').split(',') except (ConfigParser.Error, Exception) as e: self.logger.exception(e) else: try: #Run any data collector plugins we have. if enable_data_collector_plugins: self.collect_data(data_collector_plugin_directories= data_collector_plugin_directories, begin_date=begin_date) site_data = OrderedDict() total_time = 0 tide_offsets = [] reset_site_specific_data_only = True wq_data = kdh_wq_data(config_file=kwargs['config_file_name']) site_model_ensemble = [] tide_offsets = [] for site in wq_sites: try: # Get all the models used for the particular sample site. model_list = self.build_test_objects( config_file=config_file, site_name=site.name) if len(model_list): # Create the container for all the models. site_equations = wqEquations( site.name, model_list, True) else: self.logger.error("No models found for site: %s" % (site.name)) except (ConfigParser.Error, Exception) as e: self.logger.exception(e) else: try: #Get site specific settings. hycom_data_prefix = config_file.get( site.name, 'hycom_prefix') copernicus_data_prefix = config_file.get( site.name, 'copernicus_prefix') rutgers_data_prefix = config_file.get( site.name, 'rutgers_prefix') rutgers_cell_point = tuple( float(pt) for pt in config_file.get( site.name, 'rutgers_cell_loc').split(',')) # Get the platforms the site will use platforms = config_file.get( site.name, 'platforms').split(',') platform_nfo = [] for platform in platforms: obs_uoms = config_file.get( platform, 'observation').split(';') obs_uom_nfo = [] for nfo in obs_uoms: obs, uom = nfo.split(',') obs_uom_nfo.append({ 'observation': obs, 'uom': uom }) platform_nfo.append({ 'platform_handle': config_file.get(platform, 'handle'), 'observations': obs_uom_nfo }) offset_tide_station = config_file.get( site.name, 'offset_tide_station') offset_param = "%s_tide_data" % ( offset_tide_station) # We use the virtual tide sites as there no stations near the sites. tide_station = config_file.get( site.name, 'tide_station') tide_station_settings = { 'tide_station': tide_station, 'offset_tide_station': config_file.get(offset_param, 'station_id'), 'hi_tide_time_offset': config_file.getint(offset_param, 'hi_tide_time_offset'), 'lo_tide_time_offset': config_file.getint(offset_param, 'lo_tide_time_offset'), 'hi_tide_height_offset': config_file.getfloat(offset_param, 'hi_tide_height_offset'), 'lo_tide_height_offset': config_file.getfloat(offset_param, 'lo_tide_height_offset') } tide_offsets.append(tide_station_settings) except ConfigParser.Error as e: self.logger.exception(e) else: wq_data.reset( site=site, tide_station=tide_station, tide_offset_params=tide_offsets, hycom_prefix=hycom_data_prefix, copernicus_prefix=copernicus_data_prefix, start_date=begin_date, rutgers_cell_point=rutgers_cell_point, rutgers_prefix=rutgers_data_prefix, platform_info=platform_nfo) site_data['station_name'] = site.name wq_data.query_data(begin_date, begin_date, site_data) reset_site_specific_data_only = True site_equations.runTests(site_data) total_test_time = sum( testObj.test_time for testObj in site_equations.tests) self.logger.debug( "Site: %s total time to execute models: %f ms" % (site.name, total_test_time * 1000)) total_time += total_test_time #Calculate some statistics on the entero results. This is making an assumption #that all the tests we are running are calculating the same value, the entero #amount. entero_stats = None if len(site_equations.tests): entero_stats = stats() for test in site_equations.tests: if test.mlrResult is not None: entero_stats.addValue(test.mlrResult) entero_stats.doCalculations() site_model_ensemble.append({ 'metadata': site, 'models': site_equations, 'statistics': entero_stats, 'entero_value': None }) if enable_output_plugins: self.output_results( output_plugin_directories=output_plugin_dirs, site_model_ensemble=site_model_ensemble, prediction_date=kwargs['begin_date'], prediction_run_date=prediction_testrun_date) except Exception as e: self.logger.exception(e)
def main(): parser = optparse.OptionParser() parser.add_option("--ConfigFile", dest="config_file", default=None, help="INI Configuration file." ) parser.add_option("--OutputDirectory", dest="output_dir", default=None, help="Directory to save the historical data site files." ) (options, args) = parser.parse_args() try: config_file = ConfigParser.RawConfigParser() config_file.read(options.config_file) logConfFile = config_file.get('logging', 'config_file') logging.config.fileConfig(logConfFile) logger = logging.getLogger('build_historical_logger') logger.info("Log file opened.") boundaries_location_file = config_file.get('boundaries_settings', 'boundaries_file') sites_location_file = config_file.get('boundaries_settings', 'sample_sites') wq_historical_db = config_file.get('database', 'name') except ConfigParser.Error as e: import traceback traceback.print_exc(e) sys.exit(-1) else: #Load the sample site information. Has name, location and the boundaries that contain the site. wq_sites = wq_sample_sites() wq_sites.load_sites(file_name=sites_location_file, boundary_file=boundaries_location_file) wq_historical_data = folly_historical_wq_data(config_file=options.config_file) sample_data_directory = '/Users/danramage/Documents/workspace/WaterQuality/FollyBeach-WaterQuality/data/sample_data' historical_sample_files = os.listdir(sample_data_directory) #start_date = timezone('UTC').localize(datetime.strptime('2005-01-01 00:00:00', '%Y-%m-%d %H:%M:%S')) utc_tz = timezone('UTC') est_tz = timezone('US/Eastern') data_start_date = utc_tz.localize(datetime.strptime('2005-01-01 00:00:00', '%Y-%m-%d %H:%M:%S')) for site in wq_sites: out_file = os.path.join(options.output_dir, "%s_historical_data.csv" % (site.name)) write_header = True with open(out_file, 'w') as site_data_file: try: # Get the station specific tide stations tide_station = config_file.get(site.description, 'tide_station') offset_tide_station = config_file.get(site.description, 'offset_tide_station') offset_key = "%s_tide_data" % (offset_tide_station) tide_offset_settings = { 'tide_station': config_file.get(offset_key, 'station_id'), 'hi_tide_time_offset': config_file.getint(offset_key, 'hi_tide_time_offset'), 'lo_tide_time_offset': config_file.getint(offset_key, 'lo_tide_time_offset'), 'hi_tide_height_offset': config_file.getfloat(offset_key, 'hi_tide_height_offset'), 'lo_tide_height_offset': config_file.getfloat(offset_key, 'lo_tide_height_offset') } #Get the platforms the site will use platforms = config_file.get(site.description, 'platforms').split(',') platform_nfo = [] for platform in platforms: obs_uoms = config_file.get(platform,'observation').split(';') obs_uom_nfo = [] for nfo in obs_uoms: obs,uom = nfo.split(',') obs_uom_nfo.append({'observation': obs, 'uom': uom}) platform_nfo.append({'platform_handle': config_file.get(platform,'handle'), 'observations': obs_uom_nfo}) except ConfigParser.Error as e: if logger: logger.exception(e) file_name = site.name for file in historical_sample_files: if file.find(file_name) != -1: samples_collection = wq_samples_collection() full_path = os.path.join(sample_data_directory, file) parse_file(data_file=full_path, samples_collection=samples_collection, start_date=data_start_date) try: sample_recs = samples_collection[site.name] except (KeyError, Exception) as e: logger.exception(e) else: sample_recs.sort(key=lambda x: x.date_time, reverse=False) auto_num = 1 for sample_data in sample_recs: start_date = sample_data.date_time try: wq_date_time_local = sample_data.date_time.astimezone(est_tz) site_data = OrderedDict([ ('autonumber', auto_num), ('station_name', site.name), ('station_desc', site.description), ('sample_datetime', wq_date_time_local), ('sample_datetime_utc', sample_data.date_time), ('enterococcus_value', sample_data.value), ]) wq_historical_data.reset(site=site, tide_station=tide_station, tide_offset_params=tide_offset_settings, start_date=sample_data.date_time, platform_info=platform_nfo) wq_historical_data.query_data(sample_data.date_time, sample_data.date_time, site_data) header_buf = [] data = [] for key in site_data: if write_header: header_buf.append(key) if site_data[key] != wq_defines.NO_DATA: data.append(str(site_data[key])) else: data.append("") if write_header: site_data_file.write(",".join(header_buf)) site_data_file.write('\n') header_buf[:] write_header = False site_data_file.write(",".join(data)) site_data_file.write('\n') site_data_file.flush() data[:] auto_num += 1 except Exception as e: if logger: logger.exception(e) sys.exit(-1) """ site_data = OrderedDict([('autonumber', 1), ('station_name', row['SPLocation']), ('sample_datetime', wq_date.strftime("%Y-%m-%d %H:%M:%S")), ('sample_datetime_utc', wq_utc_date.strftime("%Y-%m-%d %H:%M:%S")), ('County', row['County']), ('enterococcus_value', row['enterococcus']), ('enterococcus_code', row['enterococcus_code'])]) """ return
sys.exit(-1) try: boundaries_location_file = config_file.get('boundaries_settings', 'boundaries_file') sites_location_file = config_file.get('boundaries_settings', 'sample_sites') with open(options.worksheet_to_site, "r") as worksheet_site_file: worksheet_name_to_sample_site_mapping = json.load( worksheet_site_file) except (ConfigParser.Error, Exception) as e: if logger: logger.exception(e) else: #Load the sample site information. Has name, location and the boundaries that contain the site. wq_sites = wq_sample_sites() wq_sites.load_sites(file_name=sites_location_file, boundary_file=boundaries_location_file) #Build watershed groups. watersheds = {} for site in wq_sites: if site.contained_by[0] is not None: if site.contained_by[0].name.lower() not in watersheds: watersheds[site.contained_by[0].name.lower()] = [] watersheds[site.contained_by[0].name.lower()].append(site.name) if len(options.model_csv_dir): model_csv_list = glob.glob('%s/*.csv' % (options.model_csv_dir)) start_line = 1 header_row = ["Location", "Site", "Equation"]
def run_wq_models(self, **kwargs): prediction_testrun_date = datetime.now() try: config_file = ConfigParser.RawConfigParser() config_file.read(kwargs['config_file_name']) data_collector_plugin_directories = config_file.get( 'data_collector_plugins', 'plugin_directories') enable_data_collector_plugins = config_file.getboolean( 'data_collector_plugins', 'enable_plugins') if enable_data_collector_plugins and len( data_collector_plugin_directories): data_collector_plugin_directories = data_collector_plugin_directories.split( ',') self.collect_data(data_collector_plugin_directories= data_collector_plugin_directories) boundaries_location_file = config_file.get('boundaries_settings', 'boundaries_file') sites_location_file = config_file.get('boundaries_settings', 'sample_sites') units_file = config_file.get('units_conversion', 'config_file') output_plugin_dirs = config_file.get( 'output_plugins', 'plugin_directories').split(',') enable_output_plugins = config_file.getboolean( 'output_plugins', 'enable_plugins') xenia_nexrad_db_file = config_file.get('database', 'name') #MOve xenia obs db settings into standalone ini. We can then #check the main ini file into source control without exposing login info. db_settings_ini = config_file.get('password_protected_configs', 'settings_ini') xenia_obs_db_config_file = ConfigParser.RawConfigParser() xenia_obs_db_config_file.read(db_settings_ini) xenia_obs_db_host = xenia_obs_db_config_file.get( 'xenia_observation_database', 'host') xenia_obs_db_user = xenia_obs_db_config_file.get( 'xenia_observation_database', 'user') xenia_obs_db_password = xenia_obs_db_config_file.get( 'xenia_observation_database', 'password') xenia_obs_db_name = xenia_obs_db_config_file.get( 'xenia_observation_database', 'database') except (ConfigParser.Error, Exception) as e: self.logger.exception(e) else: #Load the sample site information. Has name, location and the boundaries that contain the site. wq_sites = wq_sample_sites() wq_sites.load_sites(file_name=sites_location_file, boundary_file=boundaries_location_file) #First pass we want to get all the data, after that we only need to query #the site specific pieces. reset_site_specific_data_only = False site_data = OrderedDict() total_time = 0 site_model_ensemble = [] for site in wq_sites: try: #Get all the models used for the particular sample site. model_list = self.build_test_objects( config_file=config_file, site_name=site.name) if len(model_list) == 0: self.logger.error("No models found for site: %s" % (site.name)) except (ConfigParser.Error, Exception) as e: self.logger.exception(e) else: try: if len(model_list): site_models = xgb_ensemble(site, model_list) for model in model_list: self.logger.debug( "Site: %s Model: %s starting prediction" % (site.name, model.name)) site_data = OrderedDict() model_data_dir = config_file.get( site.name, 'data_directory') wq_data = follybeach_wq_data( xenia_nexrad_db_name=xenia_nexrad_db_file, xenia_obs_db_type='postgres', xenia_obs_db_host=xenia_obs_db_host, xenia_obs_db_user=xenia_obs_db_user, xenia_obs_db_password=xenia_obs_db_password, xenia_obs_db_name=xenia_obs_db_name, units_file=units_file) wq_data.reset( site=site, tide_station_settings=model.tide_settings, platform_info=model.platform_settings) #site_data['station_name'] = site.name wq_data.query_data( kwargs['begin_date'], kwargs['begin_date'], site_data, reset_site_specific_data_only) #Save data to csv file. data_file = self.save_model_data( site.name, model.name, model_data_dir, site_data, kwargs['begin_date']) model.runTest(site_data) total_test_time = sum(xgb_model.test_time for xgb_model in model_list) self.logger.debug( "Site: %s total time to execute models: %f ms" % (site.name, total_test_time * 1000)) total_time += total_test_time site_models.overall_prediction() site_model_ensemble.append({ 'metadata': site, 'models': site_models, 'statistics': None, 'entero_value': None }) ''' #Calculate some statistics on the entero results. This is making an assumption #that all the tests we are running are calculating the same value, the entero #amount. entero_stats = None if len(site_equations.tests): entero_stats = stats() for test in site_equations.tests: if test.mlrResult is not None: entero_stats.addValue(test.mlrResult) entero_stats.doCalculations() ''' except Exception as e: self.logger.exception(e) self.logger.debug("Total time to execute all sites models: %f ms" % (total_time * 1000)) try: if enable_output_plugins: self.output_results( output_plugin_directories=output_plugin_dirs, site_model_ensemble=site_model_ensemble, prediction_date=kwargs['begin_date'], prediction_run_date=prediction_testrun_date) except Exception as e: self.logger.exception(e) return