def add_job(self, func, name=None, *args, **kwargs): """ Adds a job to the scheduler. The func param can be either a function/method or a BaseJob instance. If it's a function, it'll be wrapped in a MonitoredFunctionJob in order to be traceable. :param func: A function or method to be called or a BaseJob instance. :return: An APScheduler Job instance. """ if not isinstance(func, BaseJob): job = MonitoredFunctionJob(function=func, progress_monitor=ProgressMonitor()) else: job = func # Add the wrapped job to the real scheduler. nm = name or job.name id = xxh64(name or job.name).hexdigest() j = BackgroundScheduler.add_job(self, func=job.start, id=id, name=nm, replace_existing=True, *args, **kwargs) job.id = j.id job.name = j.name for l in self._progress_listeners: job.progress_monitor.add_listener(l['listener'], mask=l['mask']) return j
def __init__(self, system_config, parent_task_monitor=None, max_events=10000): super(RunImputation, self).__init__( name='Impute missing data and calculate radiation', progress_monitor=ProgressMonitor()) self.max_events = max_events self.station = re.compile('Station: (\d+)') self.system_config = system_config if not parent_task_monitor: parent_task_monitor = NullMonitor() self.parent_monitor = parent_task_monitor
def __init__(self, system_config): super(SoilsUpdater, self).__init__(progress_monitor=ProgressMonitor()) self.system_config = system_config self.db = system_config.database['yield_db']
def __init__(self, system_config): super(YieldDatabaseSync, self).__init__(progress_monitor=ProgressMonitor(end_value=4)) self.system_config = system_config
def run(self): self.progress_monitor.update_progress(job_status=JOB_STATUS_WAITING) # Acquire a read lock (parallel job). with self.system_config.jobs_lock.parallel_job(): self.progress_monitor.update_progress( job_status=JOB_STATUS_RUNNING) if 'yield_sync_db' in self.system_config.database: source_db = self.system_config.database['yield_db'] target_db = self.system_config.database['yield_sync_db'] new_forecasts = self.__find_collection_diff__( collection_name='forecasts', source_db=source_db, target_db=target_db) if new_forecasts.count() > 0: forecasts_insert_monitor = ProgressMonitor( end_value=new_forecasts.count()) self.progress_monitor.add_subjob( forecasts_insert_monitor, job_name='Synchronize forecasts') inserted_forecasts_count = 0 # Sync new forecasts. for f in new_forecasts: simulations_ids = f['simulations'] bulk_op = target_db.simulations.initialize_unordered_bulk_op( ) # Fetch this forecast' simulations. for simulation in source_db.simulations.find( {'_id': { '$in': simulations_ids }}): bulk_op.insert(simulation) try: bulk_op.execute() except BulkWriteError as bwe: # Check if every error that was raised was a duplicate key error (11000). for err in bwe.details['writeErrors']: if err['code'] != 11000: raise RuntimeError( 'Non recoverable error found while trying to sync yield ' 'databases. Details: %s' % bwe.details) target_db.forecasts.insert(f) inserted_forecasts_count += 1 forecasts_insert_monitor.update_progress( inserted_forecasts_count) # Notify we finished syncing forecasts (the first part of the job). self.progress_monitor.update_progress(new_value=1) # Sync new reference simulations. self.__insert_missing_documents__( collection_name='reference_simulations', source_db=source_db, target_db=target_db) self.progress_monitor.update_progress(new_value=2) # Sync new locations. self.__insert_missing_documents__(collection_name='locations', source_db=source_db, target_db=target_db) self.progress_monitor.update_progress(new_value=3) # Sync new reference rainfalls. self.__insert_missing_documents__( collection_name='reference_rainfall', id_field='omm_id', source_db=source_db, target_db=target_db) self.progress_monitor.update_progress(new_value=4) # Sync new soils. self.__insert_missing_documents__(collection_name='soils', source_db=source_db, target_db=target_db)
def update_weather_db(self, progress_monitor=None): if not progress_monitor: progress_monitor = NullMonitor if len(self.weather_stations_ids) == 0: return # Notify observers we're going to wait for a lock acquisition. progress_monitor.job_started(initial_status=JOB_STATUS_WAITING) with self.system_config.jobs_lock.blocking_job( priority=UPDATE_DB_DATA): # Lock acquired, notify observers. progress_monitor.update_progress(job_status=JOB_STATUS_RUNNING) # Check weather api configuration. if 'weather_update_api' not in self.system_config: raise RuntimeError('Weather update API not configured.') api_config = self.system_config['weather_update_api'] if not api_config['url']: raise RuntimeError( 'Missing URL in weather update API configuration.') if 'user' not in api_config: raise RuntimeError( 'Missing username in weather update API configuration.') if 'password' not in api_config: api_config['password'] = DatabaseUtils.__password_lookup__( api_config['user'], config_path=self.system_config.config_path) if not api_config['password']: raise RuntimeError( 'Missing password in weather update API configuration and it couldn\'t be found ' 'in the password lookup folder.') req_params = { 'url': api_config['url'], 'user': api_config['user'], 'password': api_config['password'], 'table': 'estacion_registro_diario' } stations_max_data_date = self.find_max_data_dates( self.weather_stations_ids) progress_monitor.end_value = len(stations_max_data_date) request_groups = group_by(stations_max_data_date, lambda x: x[1]) stations_updated = set() n_stations_updated = 0 cursor = None try: wth_db = self.system_config.database['weather_db'] cursor = wth_db.cursor() cursor.execute('BEGIN TRANSACTION') for min_date, omm_ids in request_groups.items(): stations_ids = {omm_id[0] for omm_id in omm_ids} n_stations_updated += len(stations_ids) str_ids = {str(omm_id) for omm_id in stations_ids} req_params['omm_ids'] = ','.join(str_ids) req_params['min_date'] = min_date + timedelta(days=1) response = requests.get( '%(url)s?login=%(user)s&password=%(password)s' '&tabla=%(table)s&fecha_desde=%(min_date)s&omm_id=%(omm_ids)s' % req_params) if not response.ok: raise RuntimeError( 'API request failed (status: %s). Reason: %s.' % (response.status_code, response.reason)) if 'text/csv' not in response.headers['content-type']: raise RuntimeError( 'Wrong response type in update API: %s.' % response.headers['content-type']) update_data = response.content.decode( 'utf8').strip().split('\n') progress_monitor.update_progress( new_value=n_stations_updated) # descomentar para forzar la ejecución de la imputación # stations_updated |= self.weather_stations_ids if len(update_data) < 2: continue header = update_data[0].split('\t') update_data = io.StringIO('\n'.join(update_data[1:])) # Insert new data into the database. cursor.copy_from(update_data, 'estacion_registro_diario') stations_updated |= stations_ids # Extend set. if len(stations_updated) > 0: cursor.execute("COMMIT") impute_job = RunImputation( system_config=self.system_config, parent_task_monitor=progress_monitor) stations_to_impute = stations_updated.intersection( self.weather_stations_ids) if len(stations_to_impute) > 0: ret_val = impute_job.start( weather_stations=stations_to_impute) else: ret_val = 0 pm = ProgressMonitor() progress_monitor.add_subjob(pm, 'Refresh materialized view') # Refresh materialized view. self.refresh_view(pm) if ret_val == 0: # Update max dates again. self.update_max_dates(run_blocking=False) logging.info( 'Updated weather data for station(s): %s.' % stations_updated) return 0 else: logging.error( 'Imputation ended with a non zero exit status (%d).' % ret_val) return 2 else: # No new data, end transaction to avoid holding a lock on tables. cursor.execute("ROLLBACK") logging.info('No new weather data found.') except Exception as ex: logging.error('Failed to update weather data. Reason: %s' % log_format_exception(ex)) if cursor: # If there is an open cursor, rollback the transaction. cursor.execute("ROLLBACK") return 2 finally: if cursor: cursor.close() return 1
def run_forecast(self, yield_forecast, priority=RUN_FORECAST, progress_monitor=None): forecast_full_name = '%s (%s)' % (yield_forecast.name, yield_forecast.forecast_date) logging.getLogger().info('Running forecast "%s".' % forecast_full_name) psims_exit_code = None db = None forecast_id = None simulations_ids = None exception_raised = False if not progress_monitor: progress_monitor = NullMonitor progress_monitor.end_value = 5 progress_monitor.job_started() progress_monitor.update_progress(job_status=JOB_STATUS_WAITING) with self.system_config.jobs_lock.blocking_job(priority=priority): # Lock acquired. progress_monitor.update_progress(job_status=JOB_STATUS_RUNNING) forecast = copy.deepcopy(yield_forecast) try: run_start_time = datetime.now() # Get MongoDB connection. db = self.system_config.database['yield_db'] # Add database connection information to the forecast config to use it when writing pSIMS params file. forecast.configuration.database = DotDict({ 'name': db.name, 'host': db.client.HOST, 'port': db.client.PORT }) forecast.configuration.weather_maker_class = ForecastLoader.weather_series_makers[ forecast.configuration.weather_series] # Create an instance of the weather series maker. wth_series_maker = forecast.configuration.weather_maker_class( self.system_config, forecast.configuration.max_parallelism) # The simulations collection can be defined by the user in the YAML file. if 'simulation_collection' not in forecast.configuration: # If it's not defined, base the decision of which one to use on the type of weather series # the forecast will use. forecast.configuration[ 'simulation_collection'] = 'simulations' if forecast.configuration.weather_series == 'historic': forecast.configuration[ 'simulation_collection'] = 'reference_simulations' if forecast.configuration.weather_series == 'netcdf': forecast.configuration[ 'simulation_collection'] = 'netcdf_simulations' if forecast.configuration[ 'simulation_collection'] not in db.collection_names(): raise RuntimeError( 'The specified collection (%s) does not exist in the results database.' % forecast.configuration['simulation_collection']) folder_name = "%s" % (datetime.now().isoformat()) folder_name = folder_name.replace('"', '').replace( '\'', '').replace(' ', '_') forecast.folder_name = folder_name # Add folder name to rundir and create it. forecast.paths.rundir = os.path.abspath( os.path.join(forecast.paths.rundir, folder_name)) create_folder_with_permissions(forecast.paths.rundir) # Create a folder for the weather grid inside that rundir. forecast.paths.weather_grid_path = os.path.join( forecast.paths.rundir, 'wth') create_folder_with_permissions( forecast.paths.weather_grid_path) # Create a folder for the soil grid inside that rundir. forecast.paths.soil_grid_path = os.path.join( forecast.paths.rundir, 'soils') create_folder_with_permissions(forecast.paths.soil_grid_path) # Create the folder where we'll read the CSV files created by the database. forecast.paths.wth_csv_read = os.path.join( forecast.paths.wth_csv_read, folder_name) forecast.paths.wth_csv_export = os.path.join( forecast.paths.wth_csv_export, folder_name) create_folder_with_permissions(forecast.paths.wth_csv_read) active_threads = dict() forecast.weather_stations = {} forecast.rainfall = {} stations_not_updated = set() if forecast.forecast_date is None: run_date = datetime.now().date() else: run_date = datetime.strptime(forecast.forecast_date, '%Y-%m-%d').date() for loc_key, location in forecast['locations'].items(): omm_id = location['weather_station'] # Upsert location. db.locations.update_one( {'_id': location.id}, { # '$set': { # "name": location.name, # "coord_x": location.coord_x, # "coord_y": location.coord_y, # "weather_station": location.weather_station # } '$set': location.persistent_view() }, upsert=True) # If this forecast is creating weather files from the weather database, check that the station # associated with each location is currently updated. if issubclass(wth_series_maker.__class__, DatabaseWeatherSeries): if omm_id not in self.weather_updater.wth_max_date: # Since the system only updates weather info for the stations that are currently being used, # it may happen that the requested station is not in the weather updated max dates dict. self.weather_updater.add_weather_station_id(omm_id) stations_not_updated.add(omm_id) continue elif not isinstance(wth_series_maker, HistoricalSeriesMaker) and \ self.weather_updater.wth_max_date[omm_id] < run_date: # If the forecast date is greater than the max date of climate data for this station, # we add it to the not updated set. stations_not_updated.add(omm_id) continue if omm_id not in active_threads: # Weather station data updated, forecast can be ran. active_threads[omm_id] = threading.Thread( target=wth_series_maker.create_series, name='create_series for omm_id = %s' % omm_id, args=(location, forecast)) else: # Weather station already has an associated thread that will create the weather series. continue if len(stations_not_updated) > 0: # Forecast can't continue, must be rescheduled. logging.warning( "Couldn't run forecast \"%s\" because the following weather stations don't have " "updated data: %s." % (forecast_full_name, list(stations_not_updated))) self.reschedule_forecast(forecast) return 0 progress_monitor.update_progress(new_value=1) weather_series_monitor = ProgressMonitor( end_value=len(active_threads)) progress_monitor.add_subjob( weather_series_monitor, job_name='Create weather series (%s)' % forecast.configuration.weather_maker_class.__name__) joined_threads_count = 0 # Start all weather maker threads. for t in list(active_threads.values()): t.start() # Wait for the weather grid to be populated. for t in list(active_threads.values()): t.join() joined_threads_count += 1 weather_series_monitor.update_progress( joined_threads_count) weather_series_monitor.job_ended() progress_monitor.update_progress(new_value=2) # If the folder is empty, delete it. if os.path.exists(forecast.paths.wth_csv_read) and len( os.listdir(forecast.paths.wth_csv_read)) == 0: # These folder are used only by classes in core.modules.simulations_manager.weather.csv # The rest of the weather series makers use in-memory series creation. shutil.rmtree(forecast.paths.wth_csv_read) forecast_persistent_view = forecast.persistent_view() is_reference_forecast = True if forecast_persistent_view: is_reference_forecast = False forecast_id = db.forecasts.insert_one( forecast_persistent_view).inserted_id if not forecast_id: raise RuntimeError( 'Failed to insert forecast with id: %s' % forecast_persistent_view['_id']) simulations_ids = [] reference_ids = [] # Flatten simulations and update location info (with id's and computed weather stations). for loc_key, loc_simulations in forecast.simulations.items(): for sim in loc_simulations: sim.location = forecast.locations[loc_key] sim.weather_station = forecast.weather_stations[ sim.location.weather_station] # If a simulation has an associated forecast, fill the associated fields. if forecast_id: sim.forecast_id = forecast_id sim.forecast_date = forecast.forecast_date reference_ids.append(sim.reference_id) sim_id = db[forecast.configuration[ 'simulation_collection']].insert_one( sim.persistent_view()).inserted_id sim['_id'] = sim_id simulations_ids.append(sim_id) if not is_reference_forecast: # Find which simulations have a reference simulation associated. found_reference_simulations = db.reference_simulations.find( {'_id': { '$in': reference_ids }}, projection=['_id']) found_reference_simulations = set( [s['_id'] for s in found_reference_simulations]) diff = set( reference_ids ) - found_reference_simulations - self.scheduled_reference_simulations_ids if len(diff) > 0: # There are simulations that don't have a reference simulation calculated. ref_forecast = copy.deepcopy(yield_forecast) ref_forecast.name = 'Reference simulations for forecast %s' % forecast.name ref_forecast.configuration.weather_series = 'historic' ref_forecast.forecast_date = None rm_locs = [] for loc_key, loc_simulations in ref_forecast.simulations.items( ): # Filter reference simulations. loc_simulations[:] = [ x for x in loc_simulations if x.reference_id in diff ] if len(loc_simulations) == 0: rm_locs.append(loc_key) for loc_key in rm_locs: del ref_forecast.locations[loc_key] del ref_forecast.simulations[loc_key] self.schedule_forecast(ref_forecast, priority=RUN_REFERENCE_FORECAST) self.scheduled_reference_simulations_ids |= diff logging.info( 'Scheduled reference simulations for forecast: %s' % forecast.name) else: # Remove this reference forecasts id's. self.scheduled_reference_simulations_ids -= set( reference_ids) progress_monitor.update_progress(new_value=3) forecast.paths.run_script_path = CampaignWriter.write_campaign( forecast, output_dir=forecast.paths.rundir) forecast.simulation_count = len(simulations_ids) progress_monitor.update_progress(new_value=4) # Insertar ID's de simulaciones en el pronóstico. if forecast_id: db.forecasts.update_one( {"_id": forecast_id}, {"$push": { "simulations": { "$each": simulations_ids } }}) # Ejecutar simulaciones. weather_series_monitor = ProgressMonitor() progress_monitor.add_subjob(weather_series_monitor, job_name='Run pSIMS') psims_exit_code = self.psims_runner.run( forecast, progress_monitor=weather_series_monitor, verbose=True) # Check results if psims_exit_code == 0: inserted_simulations = db[ forecast.configuration['simulation_collection']].find( { '_id': { '$in': simulations_ids }, # Find simulations that have results field (either cycle or daily). # This property is created by the pSIMS Mongo hook so if a simulation doesn't have this # field it means that the execution inside pSIMS failed. '$or': [{ 'daily_results': { '$exists': True } }, { 'cycle_results': { '$exists': True } }] }, projection=[ 'daily_results', 'cycle_results', 'name' ]) if len(simulations_ids) != inserted_simulations.count(): raise RuntimeError( 'Mismatch between simulations id\'s length and finished simulations ' 'count (%s != %s)' % (len(simulations_ids), inserted_simulations.count())) if 'HWAM' in forecast.results.cycle: # Check that there are no -99 values in the crop yield. for sim in inserted_simulations: if 'cycle_results' not in sim: continue for scen_idx, scenario in enumerate( sim['cycle_results']['HWAM']['scenarios']): if not (isinstance(scenario['value'], int) or isinstance(scenario['value'], float)): # Nested years inside the scenario. for year_index, v in enumerate( scenario['value']): if v['value'] < 0: raise RuntimeError( 'Found a negative value for HWAM inside a simulation ' '(%s, id = %s, scenario index = %d, year index = %d).' % (sim['name'], sim['_id'], scen_idx, year_index)) elif scenario['value'] < 0: raise RuntimeError( 'Found a negative value for HWAM inside a simulation (%s, ' 'id = %s, scenario index = %d).' % (sim['name'], sim['_id'], scen_idx)) logging.getLogger().info( 'Finished running forecast "%s" (time=%s).\n' % (forecast.name, datetime.now() - run_start_time)) except: logging.getLogger().error( "Failed to run forecast '%s'. Reason: %s" % (forecast.name, log_format_exception())) exception_raised = True finally: if exception_raised or psims_exit_code != 0: logging.info('Rolling back DB data for forecast "%s".' % forecast_full_name) if db: if simulations_ids and len(simulations_ids) > 0: db[forecast.configuration[ 'simulation_collection']].delete_many( {"_id": { "$in": simulations_ids }}) if forecast_id: db.forecasts.delete_one({"_id": forecast_id}) return -1 if not psims_exit_code or psims_exit_code == 0: # Clean the rundir. if os.path.exists(forecast.paths.rundir): shutil.rmtree(forecast.paths.rundir) if psims_exit_code == 0: # Clean pSIMS run folder. rundir_regex = re.compile('.+/run(\d){3}$') files_filter = lambda file_name: rundir_regex.match( file_name) is not None psims_run_dirs = sorted(listdir_fullpath( forecast.paths.psims, filter=files_filter), reverse=True) if len(psims_run_dirs) > 0: # Remove the last runNNN directory (the one this execution created). shutil.rmtree(psims_run_dirs[0]) return psims_exit_code
def run(self): logging.info('Running yield database synchronization.') self.progress_monitor.update_progress(job_status=JOB_STATUS_WAITING) # Acquire a read lock (parallel job). with self.system_config.jobs_lock.parallel_job(): self.progress_monitor.update_progress( job_status=JOB_STATUS_RUNNING) if 'yield_sync_db' in self.system_config.database: source_db = self.system_config.database['yield_db'] target_db = self.system_config.database['yield_sync_db'] new_forecasts = self.__find_collection_diff__( collection_name='forecasts', source_db=source_db, target_db=target_db) new_forecasts_count = self.__count_collection_diff__( collection_name='forecasts', source_db=source_db, target_db=target_db) if new_forecasts_count > 0: forecasts_insert_monitor = ProgressMonitor( end_value=new_forecasts_count) self.progress_monitor.add_subjob( forecasts_insert_monitor, job_name='Synchronize forecasts') inserted_forecasts_count = 0 # Sync new forecasts. for f in new_forecasts: simulations_ids = f['simulations'] bulk_op = target_db.simulations.initialize_unordered_bulk_op( ) # Fetch this forecast' simulations. for simulation in source_db.simulations.find( {'_id': { '$in': simulations_ids }}): bulk_op.insert(simulation) try: bulk_op.execute() except BulkWriteError as bwe: # Check if every error that was raised was a duplicate key error (11000). for err in bwe.details['writeErrors']: if err['code'] != 11000: raise RuntimeError( 'Non recoverable error found while trying to sync yield ' 'databases. Details: %s' % bwe.details) target_db.forecasts.insert(f) inserted_forecasts_count += 1 forecasts_insert_monitor.update_progress( inserted_forecasts_count) # Notify we finished syncing forecasts (the first part of the job). self.progress_monitor.update_progress(new_value=1) # Sync new reference simulations. self.__insert_missing_documents__( collection_name='reference_simulations', source_db=source_db, target_db=target_db) self.progress_monitor.update_progress(new_value=2) # Sync new locations. self.__insert_missing_documents__(collection_name='locations', source_db=source_db, target_db=target_db) self.progress_monitor.update_progress(new_value=3) # Sync new reference rainfalls. self.__insert_missing_documents__( collection_name='reference_rainfall', id_field='omm_id', source_db=source_db, target_db=target_db) self.progress_monitor.update_progress(new_value=4) # Sync new soils. self.__insert_missing_documents__(collection_name='soils', source_db=source_db, target_db=target_db) logging.info('Yield database synchronization finished.') logging.info('Restarting shiny-server in front-end.') paramiko_logger = logging.getLogger("paramiko.transport") paramiko_logger.setLevel(logging.ERROR) invoke_logger = logging.getLogger("invoke") invoke_logger.setLevel(logging.ERROR) fabric_logger = logging.getLogger("fabric") fabric_logger.setLevel(logging.ERROR) # ssh-keygen -f "/home/${USER}/.ssh/known_hosts" -R "${frontend_ip}" # ssh-copy-id root@${frontend_ip} try: with Connection(host=self.system_config.frontend_address, user='******') as conn: result = conn.run('service shiny-server restart', hide=True) if result.ok: logging.info('Shiny-server restarted successfully') else: logging.warning( 'Shiny-server restart failed: return code {}, error {}' .format(result.exited, result.stderr)) except NoValidConnectionsError as ex: logging.warning( 'Shiny-server restart failed, conection error: {}'.format( ex.strerror)) except BadAuthenticationType as ex: logging.warning( 'Shiny-server restart failed, bad authentication type: {}'. format(ex)) except AuthenticationException as ex: logging.warning( 'Shiny-server restart failed, authentication error: {}'.format( ex)) except UnexpectedExit as ex: logging.warning( 'Shiny-server restart failed, unexpected exit error: {}'. format(ex.result.stderr.rstrip())) except Exception as ex: logging.warning( 'Shiny-server restart failed, error: {}'.format(ex)) logging.warning('Shiny-server restart failed, do it manually!!')