def setUp(self): self.series_maker = NetCDFSeriesMaker(system_config={}, max_parallelism=1) self.default_varnames = { 'time': 'time', 'x': 'x', 'y': 'y', 'scen': None } self.default_dimensions = ['time', 'x', 'y', 'scen'] if not os.path.exists('test/data/.tmp'): os.makedirs('test/data/.tmp') self.fake_forecast = DotDict({ 'paths': { 'weather_grid_path': 'test/data/.tmp' }, 'configuration': { 'netcdf_variables': self.default_varnames, 'grid_resolution': 10 } }) self.test_location = DotDict({ 'netcdf_x': 5127500, 'netcdf_y': 5907500, 'x_coord': -64.178711, 'y_coord': -36.910294, 'weather_station': 1, 'name': 'Zone 1' })
def test_updateDict(self): d = DotDict() d.prop1 = 1 d['prop2'] = 2 self.assertEqual(d.prop1, d['prop1']) self.assertEqual(d['prop2'], 2) self.assertEqual(d.get('prop2'), d.prop2)
def public_view(self): view = DotDict(copy.copy(self.__dict__)) del view['database'] del view['database_config'] del view['forecasts'] del view['logger'] del view['observer'] del view['alias_dict'] del view['jobs_lock'] del view['system_config_yaml'] del view['forecasts_files'] del view['forecasts_loader'] return view.to_json()
def public_view(self, forecasts_paths): view = DotDict(copy.deepcopy(self.__dict__)) del view['results'] del view['simulations'] view['campaign_name'] = self.campaign_name view['forecast_date'] = self.forecast_date view['forecast_id'] = self.id if 'rainfall' in view: del view['rainfall'] view['locations'] = [] for loc_key, loc in self.locations.items(): view['locations'].append(loc['name']) view['file_name'] = self.public_file_name(forecasts_paths) return view.to_json()
def __load_file__(self, forecast_file): forecasts = [] try: forecast = DotDict(yaml.safe_load(open(forecast_file))) forecast['file_name'] = forecast_file weather_series = forecast['configuration']['weather_series'] if weather_series not in self.weather_series_makers: raise RuntimeError('Weather series of type %s not supported.' % weather_series) forecast.configuration.weather_maker_class = self.weather_series_makers[ weather_series] for loc_key in list(forecast['locations'].keys()): forecast['locations'][loc_key] = Location( forecast['locations'][loc_key], forecast, self.system_config) builder = ForecastBuilder(forecast, self.system_config) builder.replace_aliases(self.system_config.alias_dict) builder.inherit_config(self.system_config.system_config_yaml) # Build and append forecasts. for f in builder.build(): forecasts.append(f) self.system_config.weather_stations_ids.update( set([ loc['weather_station'] for loc in list(f.locations.values()) ])) self.system_config.forecasts[forecast_file] = forecasts logging.getLogger().info('Loaded %d forecasts from file "%s".' % (len(forecasts), forecast_file)) return True except Exception: logging.getLogger().error( "Skipping forecast file '%s'. Reason: %s." % (forecast_file, log_format_exception())) return False
def init_from_yaml(self, yaml): if 'name' not in yaml: yaml['name'] = filename_without_ext(yaml['file_name']) if 'forecast_date' not in yaml: # If there's no forecast date defined, we explicitly define it as None. yaml['forecast_date'] = None if 'results' not in yaml: raise RuntimeError( 'Missing results property in forecast file %s.' % yaml['file_name']) # yaml['results'] = DotDict({'cycle': ['HWAM'], 'daily': []}) if 'daily' not in yaml['results']: yaml['results']['daily'] = [] if 'cycle' not in yaml['results']: yaml['results']['cycle'] = [] if len(yaml['results']['cycle']) == 0 and len( yaml['results']['daily']) == 0: raise RuntimeError( 'No expected results variables were provided in forecast file %s.' % yaml['file_name']) # Keys that belong to simulation objects and must be deleted from a Forecast object. simulation_keys = [ 'initial_conditions', 'agronomic_management', 'site_characteristics' ] for key in simulation_keys: if key in yaml: del yaml[key] self.__dict__.update(DotDict(yaml))
def build(self): # type(self.forecast_file) = DotDict forecast = self.forecast_file forecast_list = [] simulations = DotDict() # Join site_characteristics, initial_conditions and agronomic_managements by location. joined_locations = DotDict() for loc_key, values in forecast['locations'].items(): joined_loc = DotDict() if loc_key not in forecast.site_characteristics: raise RuntimeError('Missing site characteristics for location "%s".' % loc_key) joined_loc.update(values) joined_loc.soils = forecast.site_characteristics[loc_key] if loc_key not in forecast.initial_conditions: raise RuntimeError('Missing initial conditions for location "%s".' % loc_key) if loc_key not in forecast.agronomic_management: raise RuntimeError('Missing agronomic management for location "%s".' % loc_key) for soil_key in joined_loc.soils: if soil_key not in forecast.initial_conditions[loc_key]: raise RuntimeError('Missing initial conditions for soil "%s" at location "%s".' % (soil_key, loc_key)) joined_loc.soils[soil_key]['initial_conditions'] = forecast.initial_conditions[loc_key][soil_key] if soil_key not in forecast.agronomic_management[loc_key]: raise RuntimeError('Missing agronomic management for soil "%s" at location "%s".' % (soil_key, loc_key)) joined_loc.soils[soil_key]['agonomic_management'] = forecast.agronomic_management[loc_key][soil_key] joined_locations[loc_key] = joined_loc # Create simulations based on complex fields (those that allow more than one value). for loc_key, loc in joined_locations.items(): simulations[loc_key] = [] # Unwind managements. for soil_key, soil in loc.soils.items(): for mgmt_key, management in soil.agonomic_management.items(): sim = DotDict({ 'location': copy.deepcopy(loc), 'soil': copy.deepcopy(soil), 'management': copy.deepcopy(management), 'initial_conditions': copy.deepcopy(soil['initial_conditions']) }) del sim['soil']['agonomic_management'] del sim['soil']['initial_conditions'] del sim['location']['soils'] sim.name = loc.name sim.name += ' - Soil: "%s"' % soil.id if 'mgmt_name' not in management: management.mgmt_name = mgmt_key sim.name += ' - Mgmt: "%s"' % management.mgmt_name ic_water_var = None ic_water_var_content = None if 'ich20_frac' in sim.initial_conditions: ic_water_var = 'ich20_frac' ic_water_var_content = sim.initial_conditions['ich20_frac'] if 'frac_full' in sim.initial_conditions: if ic_water_var: raise RuntimeError("Water content can't be defined with two variables.") ic_water_var = 'frac_full' ic_water_var_content = sim.initial_conditions['frac_full'] if not ic_water_var_content: raise RuntimeError("Water initial conditions can't be None") # Unwind initial conditions. if isinstance(ic_water_var_content, DotDict): for ic_name, ic_values in ic_water_var_content.items(): new_sim = copy.deepcopy(sim) new_sim.initial_conditions[ic_water_var] = ic_values new_sim.name += ' - IC: "%s=%s"' % (ic_water_var, ic_name) new_sim.water_content = ic_name if self.simulation_schema: validate(json.loads(new_sim.to_json()), schema=self.simulation_schema) simulations[loc_key].append(Simulation(new_sim, forecast.crop_type)) else: if self.simulation_schema: validate(json.loads(sim.to_json()), schema=self.simulation_schema) sim.water_content = '' simulations[loc_key].append(Simulation(sim, forecast.crop_type)) # Unwind dates and create a forecast for each different date. if ('forecast_date' in forecast) and isinstance(forecast['forecast_date'], list): for date in forecast['forecast_date']: sims = copy.deepcopy(simulations) if not isinstance(date, str): raise RuntimeError('Forecast date must be a string, found: %s (%s).' % (type(date), date)) f = Forecast(forecast, sims) f.forecast_date = date forecast_list.append(f) else: f = Forecast(forecast, simulations) forecast_list.append(f) return forecast_list
def __init__(self, system_config): self.system_config = system_config self.wth_db = None self.weather_stations_ids = set() self.wth_max_date = DotDict()
def load(config_object): # Load system configuration from the YAML file and update this object's dictionary to add the keys found # in the config file, allowing us to access them using the dot notation (eg. "config.temp_folder" instead of # "config.get('temp_folder')", though this is also supported. system_config_yaml = DotDict( yaml.safe_load(open(config_object.system_config_path))) if 'max_parallelism' in system_config_yaml: max_parallelism = system_config_yaml['max_parallelism'] if (not isinstance(max_parallelism, int)) or (max_parallelism < 1): raise RuntimeError('Invalid max_parallelism value (%s).' % max_parallelism) config_object.system_config_yaml = system_config_yaml # This will be used by forecasts to inherit the global config. # Update this class __dict__ property to add the properties defined in the config yaml. config_object.update(system_config_yaml) # Create the system's job syncing lock if it doesn't exist. if not config_object.jobs_lock: config_object.jobs_lock = JobsLock( max_parallel_tasks=config_object.max_parallelism) else: # Otherwise, just update the max concurrent parallel jobs. If we reinstantiate it we'll leave # every thread waiting for a lock permanently blocked. config_object.jobs_lock.max_concurrent_readers = config_object.max_parallelism # Load databases configurations and open connections. db_config = yaml.safe_load(open(config_object.databases_config_path)) config_object['database_config'] = DotDict() config_object['database'] = DotDict() for db_conn, properties in list(db_config.items()): if 'type' not in properties: raise RuntimeError( 'Missing database type for database connection "%s".' % db_conn) properties['name'] = db_conn if properties['type'] == 'postgresql': connection = DatabaseUtils.connect_postgresql( properties, config_object.config_path) elif properties['type'] == 'mongodb': connection = DatabaseUtils.connect_mongodb( properties, config_object.config_path) else: raise RuntimeError('Unsupported database type: "%s".' % properties['type']) # Store connection config dictionary and connection instance. config_object['database_config'][db_conn] = properties config_object.database[db_conn] = connection wth_db_checker = CheckWeatherDB(system_config=config_object) wth_db_checker.start() rinde_db_checker = CheckYieldDB(system_config=config_object) rinde_db_checker.start() config_object.alias_dict = None if config_object.alias_keys_path: config_object.alias_dict = yaml.load(open( config_object.alias_keys_path, 'r'), Loader=yaml.FullLoader) # Load forecasts. if not config_object.forecasts: config_object.forecasts = {} forecast_file_list = listdir_fullpath( config_object.forecasts_path, onlyFiles=True, recursive=True, filter=(lambda x: x.endswith('yaml'))) if not config_object.forecasts_files: config_object.forecasts_files = forecast_file_list else: # If this property was already initialized, find if there are new files. for forecast_file in forecast_file_list: # If this file was already added to the property, remove it from the list we'll return to the caller. if forecast_file in config_object.forecasts_files: forecast_file_list.remove(forecast_file) return forecast_file_list
def run_forecast(self, yield_forecast, priority=RUN_FORECAST, progress_monitor=None): forecast_full_name = '%s (%s)' % (yield_forecast.name, yield_forecast.forecast_date) logging.getLogger().info('Running forecast "%s".' % forecast_full_name) psims_exit_code = None db = None forecast_id = None simulations_ids = None exception_raised = False if not progress_monitor: progress_monitor = NullMonitor progress_monitor.end_value = 5 progress_monitor.job_started() progress_monitor.update_progress(job_status=JOB_STATUS_WAITING) with self.system_config.jobs_lock.blocking_job(priority=priority): # Lock acquired. progress_monitor.update_progress(job_status=JOB_STATUS_RUNNING) forecast = copy.deepcopy(yield_forecast) try: run_start_time = datetime.now() # Get MongoDB connection. db = self.system_config.database['yield_db'] # Add database connection information to the forecast config to use it when writing pSIMS params file. forecast.configuration.database = DotDict({ 'name': db.name, 'host': db.client.HOST, 'port': db.client.PORT }) forecast.configuration.weather_maker_class = ForecastLoader.weather_series_makers[ forecast.configuration.weather_series] # Create an instance of the weather series maker. wth_series_maker = forecast.configuration.weather_maker_class( self.system_config, forecast.configuration.max_parallelism) # The simulations collection can be defined by the user in the YAML file. if 'simulation_collection' not in forecast.configuration: # If it's not defined, base the decision of which one to use on the type of weather series # the forecast will use. forecast.configuration[ 'simulation_collection'] = 'simulations' if forecast.configuration.weather_series == 'historic': forecast.configuration[ 'simulation_collection'] = 'reference_simulations' if forecast.configuration.weather_series == 'netcdf': forecast.configuration[ 'simulation_collection'] = 'netcdf_simulations' if forecast.configuration[ 'simulation_collection'] not in db.collection_names(): raise RuntimeError( 'The specified collection (%s) does not exist in the results database.' % forecast.configuration['simulation_collection']) folder_name = "%s" % (datetime.now().isoformat()) folder_name = folder_name.replace('"', '').replace( '\'', '').replace(' ', '_') forecast.folder_name = folder_name # Add folder name to rundir and create it. forecast.paths.rundir = os.path.abspath( os.path.join(forecast.paths.rundir, folder_name)) create_folder_with_permissions(forecast.paths.rundir) # Create a folder for the weather grid inside that rundir. forecast.paths.weather_grid_path = os.path.join( forecast.paths.rundir, 'wth') create_folder_with_permissions( forecast.paths.weather_grid_path) # Create a folder for the soil grid inside that rundir. forecast.paths.soil_grid_path = os.path.join( forecast.paths.rundir, 'soils') create_folder_with_permissions(forecast.paths.soil_grid_path) # Create the folder where we'll read the CSV files created by the database. forecast.paths.wth_csv_read = os.path.join( forecast.paths.wth_csv_read, folder_name) forecast.paths.wth_csv_export = os.path.join( forecast.paths.wth_csv_export, folder_name) create_folder_with_permissions(forecast.paths.wth_csv_read) active_threads = dict() forecast.weather_stations = {} forecast.rainfall = {} stations_not_updated = set() if forecast.forecast_date is None: run_date = datetime.now().date() else: run_date = datetime.strptime(forecast.forecast_date, '%Y-%m-%d').date() for loc_key, location in forecast['locations'].items(): omm_id = location['weather_station'] # Upsert location. db.locations.update_one( {'_id': location.id}, { # '$set': { # "name": location.name, # "coord_x": location.coord_x, # "coord_y": location.coord_y, # "weather_station": location.weather_station # } '$set': location.persistent_view() }, upsert=True) # If this forecast is creating weather files from the weather database, check that the station # associated with each location is currently updated. if issubclass(wth_series_maker.__class__, DatabaseWeatherSeries): if omm_id not in self.weather_updater.wth_max_date: # Since the system only updates weather info for the stations that are currently being used, # it may happen that the requested station is not in the weather updated max dates dict. self.weather_updater.add_weather_station_id(omm_id) stations_not_updated.add(omm_id) continue elif not isinstance(wth_series_maker, HistoricalSeriesMaker) and \ self.weather_updater.wth_max_date[omm_id] < run_date: # If the forecast date is greater than the max date of climate data for this station, # we add it to the not updated set. stations_not_updated.add(omm_id) continue if omm_id not in active_threads: # Weather station data updated, forecast can be ran. active_threads[omm_id] = threading.Thread( target=wth_series_maker.create_series, name='create_series for omm_id = %s' % omm_id, args=(location, forecast)) else: # Weather station already has an associated thread that will create the weather series. continue if len(stations_not_updated) > 0: # Forecast can't continue, must be rescheduled. logging.warning( "Couldn't run forecast \"%s\" because the following weather stations don't have " "updated data: %s." % (forecast_full_name, list(stations_not_updated))) self.reschedule_forecast(forecast) return 0 progress_monitor.update_progress(new_value=1) weather_series_monitor = ProgressMonitor( end_value=len(active_threads)) progress_monitor.add_subjob( weather_series_monitor, job_name='Create weather series (%s)' % forecast.configuration.weather_maker_class.__name__) joined_threads_count = 0 # Start all weather maker threads. for t in list(active_threads.values()): t.start() # Wait for the weather grid to be populated. for t in list(active_threads.values()): t.join() joined_threads_count += 1 weather_series_monitor.update_progress( joined_threads_count) weather_series_monitor.job_ended() progress_monitor.update_progress(new_value=2) # If the folder is empty, delete it. if os.path.exists(forecast.paths.wth_csv_read) and len( os.listdir(forecast.paths.wth_csv_read)) == 0: # These folder are used only by classes in core.modules.simulations_manager.weather.csv # The rest of the weather series makers use in-memory series creation. shutil.rmtree(forecast.paths.wth_csv_read) forecast_persistent_view = forecast.persistent_view() is_reference_forecast = True if forecast_persistent_view: is_reference_forecast = False forecast_id = db.forecasts.insert_one( forecast_persistent_view).inserted_id if not forecast_id: raise RuntimeError( 'Failed to insert forecast with id: %s' % forecast_persistent_view['_id']) simulations_ids = [] reference_ids = [] # Flatten simulations and update location info (with id's and computed weather stations). for loc_key, loc_simulations in forecast.simulations.items(): for sim in loc_simulations: sim.location = forecast.locations[loc_key] sim.weather_station = forecast.weather_stations[ sim.location.weather_station] # If a simulation has an associated forecast, fill the associated fields. if forecast_id: sim.forecast_id = forecast_id sim.forecast_date = forecast.forecast_date reference_ids.append(sim.reference_id) sim_id = db[forecast.configuration[ 'simulation_collection']].insert_one( sim.persistent_view()).inserted_id sim['_id'] = sim_id simulations_ids.append(sim_id) if not is_reference_forecast: # Find which simulations have a reference simulation associated. found_reference_simulations = db.reference_simulations.find( {'_id': { '$in': reference_ids }}, projection=['_id']) found_reference_simulations = set( [s['_id'] for s in found_reference_simulations]) diff = set( reference_ids ) - found_reference_simulations - self.scheduled_reference_simulations_ids if len(diff) > 0: # There are simulations that don't have a reference simulation calculated. ref_forecast = copy.deepcopy(yield_forecast) ref_forecast.name = 'Reference simulations for forecast %s' % forecast.name ref_forecast.configuration.weather_series = 'historic' ref_forecast.forecast_date = None rm_locs = [] for loc_key, loc_simulations in ref_forecast.simulations.items( ): # Filter reference simulations. loc_simulations[:] = [ x for x in loc_simulations if x.reference_id in diff ] if len(loc_simulations) == 0: rm_locs.append(loc_key) for loc_key in rm_locs: del ref_forecast.locations[loc_key] del ref_forecast.simulations[loc_key] self.schedule_forecast(ref_forecast, priority=RUN_REFERENCE_FORECAST) self.scheduled_reference_simulations_ids |= diff logging.info( 'Scheduled reference simulations for forecast: %s' % forecast.name) else: # Remove this reference forecasts id's. self.scheduled_reference_simulations_ids -= set( reference_ids) progress_monitor.update_progress(new_value=3) forecast.paths.run_script_path = CampaignWriter.write_campaign( forecast, output_dir=forecast.paths.rundir) forecast.simulation_count = len(simulations_ids) progress_monitor.update_progress(new_value=4) # Insertar ID's de simulaciones en el pronóstico. if forecast_id: db.forecasts.update_one( {"_id": forecast_id}, {"$push": { "simulations": { "$each": simulations_ids } }}) # Ejecutar simulaciones. weather_series_monitor = ProgressMonitor() progress_monitor.add_subjob(weather_series_monitor, job_name='Run pSIMS') psims_exit_code = self.psims_runner.run( forecast, progress_monitor=weather_series_monitor, verbose=True) # Check results if psims_exit_code == 0: inserted_simulations = db[ forecast.configuration['simulation_collection']].find( { '_id': { '$in': simulations_ids }, # Find simulations that have results field (either cycle or daily). # This property is created by the pSIMS Mongo hook so if a simulation doesn't have this # field it means that the execution inside pSIMS failed. '$or': [{ 'daily_results': { '$exists': True } }, { 'cycle_results': { '$exists': True } }] }, projection=[ 'daily_results', 'cycle_results', 'name' ]) if len(simulations_ids) != inserted_simulations.count(): raise RuntimeError( 'Mismatch between simulations id\'s length and finished simulations ' 'count (%s != %s)' % (len(simulations_ids), inserted_simulations.count())) if 'HWAM' in forecast.results.cycle: # Check that there are no -99 values in the crop yield. for sim in inserted_simulations: if 'cycle_results' not in sim: continue for scen_idx, scenario in enumerate( sim['cycle_results']['HWAM']['scenarios']): if not (isinstance(scenario['value'], int) or isinstance(scenario['value'], float)): # Nested years inside the scenario. for year_index, v in enumerate( scenario['value']): if v['value'] < 0: raise RuntimeError( 'Found a negative value for HWAM inside a simulation ' '(%s, id = %s, scenario index = %d, year index = %d).' % (sim['name'], sim['_id'], scen_idx, year_index)) elif scenario['value'] < 0: raise RuntimeError( 'Found a negative value for HWAM inside a simulation (%s, ' 'id = %s, scenario index = %d).' % (sim['name'], sim['_id'], scen_idx)) logging.getLogger().info( 'Finished running forecast "%s" (time=%s).\n' % (forecast.name, datetime.now() - run_start_time)) except: logging.getLogger().error( "Failed to run forecast '%s'. Reason: %s" % (forecast.name, log_format_exception())) exception_raised = True finally: if exception_raised or psims_exit_code != 0: logging.info('Rolling back DB data for forecast "%s".' % forecast_full_name) if db: if simulations_ids and len(simulations_ids) > 0: db[forecast.configuration[ 'simulation_collection']].delete_many( {"_id": { "$in": simulations_ids }}) if forecast_id: db.forecasts.delete_one({"_id": forecast_id}) return -1 if not psims_exit_code or psims_exit_code == 0: # Clean the rundir. if os.path.exists(forecast.paths.rundir): shutil.rmtree(forecast.paths.rundir) if psims_exit_code == 0: # Clean pSIMS run folder. rundir_regex = re.compile('.+/run(\d){3}$') files_filter = lambda file_name: rundir_regex.match( file_name) is not None psims_run_dirs = sorted(listdir_fullpath( forecast.paths.psims, filter=files_filter), reverse=True) if len(psims_run_dirs) > 0: # Remove the last runNNN directory (the one this execution created). shutil.rmtree(psims_run_dirs[0]) return psims_exit_code
def test_initFromDict(self): d = {"prop1": 1, "prop2": 2} d = DotDict(d) self.assertEqual(d.prop1, d.get('prop1')) self.assertEqual(d.prop1, d['prop1'])
def test_joinDicts(self): d1 = DotDict({"prop1": 1}) d2 = DotDict({"prop2": 2}) d1.update(d2) self.assertEqual(d1.prop2, 2)
def test_nestedDict(self): d = {"prop1": {"prop2": {"prop3": 3}}} d = DotDict(d) self.assertEqual(d.prop1.prop2.prop3, 3)