Ejemplo n.º 1
0
def load_soils():
    def is_soil_file(x):
        return os.path.splitext(x)[1].lower() == '.json'

    soil_files = listdir_fullpath(soils_path, recursive=True, onlyFiles=True, filter=is_soil_file)

    for f in soil_files:
        key = filename_without_ext(f)

        if key in soils_dict:
            logging.warning('Duplicated soil name "%s". Found at two different paths: "%s" and "%s".' % (key,
                                                                                                         soils_dict[key],
                                                                                                         f))
            continue
        soils_dict[key] = f
Ejemplo n.º 2
0
    def create_series(self, location, forecast, extract_rainfall=False):
        with self.concurrency_lock:
            output_path = os.path.join(forecast.paths.wth_csv_read,
                                       str(location))
            create_folder_with_permissions(output_path)

            # Create the weather series in CSV format.
            self.create_from_db(location, forecast)

            # List the directory where the CSV files with the weather information are.
            dir_list = sorted(
                listdir_fullpath(output_path,
                                 onlyFiles=True,
                                 filter=(lambda x: x.endswith('csv'))))

            # Build the main file path (the file with the weather station's information).
            main_file = os.path.join(output_path,
                                     ('_' + str(location) + '.csv'))

            if main_file not in dir_list:
                raise RuntimeError(
                    'Missing station information for a weather serie (omm_id = %s).'
                    % location)

            station_info = self.read_station_info(open(main_file))
            forecast.weather_stations[location] = station_info

            weather_grid = forecast.paths['weather_grid_path']

            grid_row_folder = os.path.join(weather_grid,
                                           '_%03d' % station_info['grid_row'])
            if not os.path.exists(grid_row_folder):
                create_folder_with_permissions(grid_row_folder)

            grid_column_folder = os.path.join(
                grid_row_folder, '_%03d' % station_info['grid_column'])
            if not os.path.exists(grid_column_folder):
                create_folder_with_permissions(grid_column_folder)

            dir_list.remove(main_file)

            result_ok, rainfall_data = self.weather_writer.join_csv_files(
                dir_list,
                grid_column_folder,
                extract_rainfall=extract_rainfall,
                forecast_date=forecast.forecast_date,
                station_data=station_info)

            if extract_rainfall:
                forecast.rainfall[str(location)] = rainfall_data

            if result_ok:
                shutil.rmtree(output_path)
                forecast.weather_stations[location][
                    'weather_path'] = grid_column_folder
                forecast.weather_stations[location]['num_scenarios'] = len(
                    dir_list)
                forecast.weather_stations[location]['scen_names'] = [
                    CSVDatabaseWeatherSeries.__scen_name__(csv_file)
                    for csv_file in dir_list
                ]
            else:
                raise RuntimeError(
                    'Couldn\'t create weather file(s) in folder "%s".' %
                    grid_column_folder)
Ejemplo n.º 3
0
    ] * (len(soil_layers) - int(layers_half_idx))
    layers_no3 = [round(first_layer_ppm - nh4_ppm_first_half, 1)]

    for layer_idx in range(1, len(layers_nh4)):
        layers_no3.append(
            round(
                max(layers_no3[layer_idx - 1] / 2 - layers_nh4[layer_idx],
                    0.1), 1))

    return (layers_nh4, layers_no3)


soils_initial_values = {}

# for file_name in listdir_fullpath('./salado_soils'):
for file_name in listdir_fullpath('../data/soils/summer_soils'):
    if 'json' not in file_name:
        continue
    with open(file_name, mode='r') as f:
        soil_file = json.load(f)

    if file_name in n_kg:
        target_kilograms = n_kg[file_name]
    else:
        target_kilograms = n_kg['default']

    icnh4, icno3 = calculate_nitrogen(soil_file, target_kilograms,
                                      n_err_margin)
    soils_initial_values[os.path.basename(file_name)] = {
        'icnh4': icnh4,
        'icno3': icno3
Ejemplo n.º 4
0
    def load(config_object):
        # Load system configuration from the YAML file and update this object's dictionary to add the keys found
        # in the config file, allowing us to access them using the dot notation (eg. "config.temp_folder" instead of
        # "config.get('temp_folder')", though this is also supported.
        system_config_yaml = DotDict(
            yaml.safe_load(open(config_object.system_config_path)))

        if 'max_parallelism' in system_config_yaml:
            max_parallelism = system_config_yaml['max_parallelism']
            if (not isinstance(max_parallelism, int)) or (max_parallelism < 1):
                raise RuntimeError('Invalid max_parallelism value (%s).' %
                                   max_parallelism)

        config_object.system_config_yaml = system_config_yaml  # This will be used by forecasts to inherit the global config.
        # Update this class __dict__ property to add the properties defined in the config yaml.
        config_object.update(system_config_yaml)

        # Create the system's job syncing lock if it doesn't exist.
        if not config_object.jobs_lock:
            config_object.jobs_lock = JobsLock(
                max_parallel_tasks=config_object.max_parallelism)
        else:
            # Otherwise, just update the max concurrent parallel jobs. If we reinstantiate it we'll leave
            # every thread waiting for a lock permanently blocked.
            config_object.jobs_lock.max_concurrent_readers = config_object.max_parallelism

        # Load databases configurations and open connections.
        db_config = yaml.safe_load(open(config_object.databases_config_path))

        config_object['database_config'] = DotDict()
        config_object['database'] = DotDict()

        for db_conn, properties in list(db_config.items()):
            if 'type' not in properties:
                raise RuntimeError(
                    'Missing database type for database connection "%s".' %
                    db_conn)

            properties['name'] = db_conn

            if properties['type'] == 'postgresql':
                connection = DatabaseUtils.connect_postgresql(
                    properties, config_object.config_path)
            elif properties['type'] == 'mongodb':
                connection = DatabaseUtils.connect_mongodb(
                    properties, config_object.config_path)
            else:
                raise RuntimeError('Unsupported database type: "%s".' %
                                   properties['type'])

            # Store connection config dictionary and connection instance.
            config_object['database_config'][db_conn] = properties
            config_object.database[db_conn] = connection

        wth_db_checker = CheckWeatherDB(system_config=config_object)
        wth_db_checker.start()

        rinde_db_checker = CheckYieldDB(system_config=config_object)
        rinde_db_checker.start()

        config_object.alias_dict = None
        if config_object.alias_keys_path:
            config_object.alias_dict = yaml.load(open(
                config_object.alias_keys_path, 'r'),
                                                 Loader=yaml.FullLoader)

        # Load forecasts.
        if not config_object.forecasts:
            config_object.forecasts = {}

        forecast_file_list = listdir_fullpath(
            config_object.forecasts_path,
            onlyFiles=True,
            recursive=True,
            filter=(lambda x: x.endswith('yaml')))

        if not config_object.forecasts_files:
            config_object.forecasts_files = forecast_file_list
        else:
            # If this property was already initialized, find if there are new files.
            for forecast_file in forecast_file_list:
                # If this file was already added to the property, remove it from the list we'll return to the caller.
                if forecast_file in config_object.forecasts_files:
                    forecast_file_list.remove(forecast_file)

        return forecast_file_list
Ejemplo n.º 5
0
    def run_forecast(self,
                     yield_forecast,
                     priority=RUN_FORECAST,
                     progress_monitor=None):
        forecast_full_name = '%s (%s)' % (yield_forecast.name,
                                          yield_forecast.forecast_date)
        logging.getLogger().info('Running forecast "%s".' % forecast_full_name)

        psims_exit_code = None
        db = None
        forecast_id = None
        simulations_ids = None
        exception_raised = False

        if not progress_monitor:
            progress_monitor = NullMonitor

        progress_monitor.end_value = 5
        progress_monitor.job_started()
        progress_monitor.update_progress(job_status=JOB_STATUS_WAITING)

        with self.system_config.jobs_lock.blocking_job(priority=priority):
            # Lock acquired.
            progress_monitor.update_progress(job_status=JOB_STATUS_RUNNING)

            forecast = copy.deepcopy(yield_forecast)
            try:
                run_start_time = datetime.now()

                # Get MongoDB connection.
                db = self.system_config.database['yield_db']

                # Add database connection information to the forecast config to use it when writing pSIMS params file.
                forecast.configuration.database = DotDict({
                    'name':
                    db.name,
                    'host':
                    db.client.HOST,
                    'port':
                    db.client.PORT
                })

                forecast.configuration.weather_maker_class = ForecastLoader.weather_series_makers[
                    forecast.configuration.weather_series]

                # Create an instance of the weather series maker.
                wth_series_maker = forecast.configuration.weather_maker_class(
                    self.system_config, forecast.configuration.max_parallelism)
                # The simulations collection can be defined by the user in the YAML file.
                if 'simulation_collection' not in forecast.configuration:
                    # If it's not defined, base the decision of which one to use on the type of weather series
                    # the forecast will use.
                    forecast.configuration[
                        'simulation_collection'] = 'simulations'
                    if forecast.configuration.weather_series == 'historic':
                        forecast.configuration[
                            'simulation_collection'] = 'reference_simulations'
                    if forecast.configuration.weather_series == 'netcdf':
                        forecast.configuration[
                            'simulation_collection'] = 'netcdf_simulations'

                if forecast.configuration[
                        'simulation_collection'] not in db.collection_names():
                    raise RuntimeError(
                        'The specified collection (%s) does not exist in the results database.'
                        % forecast.configuration['simulation_collection'])

                folder_name = "%s" % (datetime.now().isoformat())
                folder_name = folder_name.replace('"', '').replace(
                    '\'', '').replace(' ', '_')
                forecast.folder_name = folder_name

                # Add folder name to rundir and create it.
                forecast.paths.rundir = os.path.abspath(
                    os.path.join(forecast.paths.rundir, folder_name))
                create_folder_with_permissions(forecast.paths.rundir)

                # Create a folder for the weather grid inside that rundir.
                forecast.paths.weather_grid_path = os.path.join(
                    forecast.paths.rundir, 'wth')
                create_folder_with_permissions(
                    forecast.paths.weather_grid_path)

                # Create a folder for the soil grid inside that rundir.
                forecast.paths.soil_grid_path = os.path.join(
                    forecast.paths.rundir, 'soils')
                create_folder_with_permissions(forecast.paths.soil_grid_path)

                # Create the folder where we'll read the CSV files created by the database.
                forecast.paths.wth_csv_read = os.path.join(
                    forecast.paths.wth_csv_read, folder_name)
                forecast.paths.wth_csv_export = os.path.join(
                    forecast.paths.wth_csv_export, folder_name)
                create_folder_with_permissions(forecast.paths.wth_csv_read)

                active_threads = dict()

                forecast.weather_stations = {}
                forecast.rainfall = {}

                stations_not_updated = set()
                if forecast.forecast_date is None:
                    run_date = datetime.now().date()
                else:
                    run_date = datetime.strptime(forecast.forecast_date,
                                                 '%Y-%m-%d').date()

                for loc_key, location in forecast['locations'].items():
                    omm_id = location['weather_station']

                    # Upsert location.
                    db.locations.update_one(
                        {'_id': location.id},
                        {
                            # '$set': {
                            #     "name": location.name,
                            #     "coord_x": location.coord_x,
                            #     "coord_y": location.coord_y,
                            #     "weather_station": location.weather_station
                            # }
                            '$set': location.persistent_view()
                        },
                        upsert=True)

                    # If this forecast is creating weather files from the weather database, check that the station
                    # associated with each location is currently updated.
                    if issubclass(wth_series_maker.__class__,
                                  DatabaseWeatherSeries):
                        if omm_id not in self.weather_updater.wth_max_date:
                            # Since the system only updates weather info for the stations that are currently being used,
                            # it may happen that the requested station is not in the weather updated max dates dict.
                            self.weather_updater.add_weather_station_id(omm_id)
                            stations_not_updated.add(omm_id)
                            continue
                        elif not isinstance(wth_series_maker, HistoricalSeriesMaker) and \
                                        self.weather_updater.wth_max_date[omm_id] < run_date:
                            # If the forecast date is greater than the max date of climate data for this station,
                            # we add it to the not updated set.
                            stations_not_updated.add(omm_id)
                            continue

                    if omm_id not in active_threads:
                        # Weather station data updated, forecast can be ran.
                        active_threads[omm_id] = threading.Thread(
                            target=wth_series_maker.create_series,
                            name='create_series for omm_id = %s' % omm_id,
                            args=(location, forecast))
                    else:
                        # Weather station already has an associated thread that will create the weather series.
                        continue

                if len(stations_not_updated) > 0:
                    # Forecast can't continue, must be rescheduled.
                    logging.warning(
                        "Couldn't run forecast \"%s\" because the following weather stations don't have "
                        "updated data: %s." %
                        (forecast_full_name, list(stations_not_updated)))
                    self.reschedule_forecast(forecast)
                    return 0

                progress_monitor.update_progress(new_value=1)

                weather_series_monitor = ProgressMonitor(
                    end_value=len(active_threads))
                progress_monitor.add_subjob(
                    weather_series_monitor,
                    job_name='Create weather series (%s)' %
                    forecast.configuration.weather_maker_class.__name__)
                joined_threads_count = 0

                # Start all weather maker threads.
                for t in list(active_threads.values()):
                    t.start()

                # Wait for the weather grid to be populated.
                for t in list(active_threads.values()):
                    t.join()
                    joined_threads_count += 1
                    weather_series_monitor.update_progress(
                        joined_threads_count)

                weather_series_monitor.job_ended()
                progress_monitor.update_progress(new_value=2)

                # If the folder is empty, delete it.
                if os.path.exists(forecast.paths.wth_csv_read) and len(
                        os.listdir(forecast.paths.wth_csv_read)) == 0:
                    # These folder are used only by classes in core.modules.simulations_manager.weather.csv
                    # The rest of the weather series makers use in-memory series creation.
                    shutil.rmtree(forecast.paths.wth_csv_read)

                forecast_persistent_view = forecast.persistent_view()
                is_reference_forecast = True
                if forecast_persistent_view:
                    is_reference_forecast = False
                    forecast_id = db.forecasts.insert_one(
                        forecast_persistent_view).inserted_id

                    if not forecast_id:
                        raise RuntimeError(
                            'Failed to insert forecast with id: %s' %
                            forecast_persistent_view['_id'])

                simulations_ids = []
                reference_ids = []

                # Flatten simulations and update location info (with id's and computed weather stations).
                for loc_key, loc_simulations in forecast.simulations.items():
                    for sim in loc_simulations:
                        sim.location = forecast.locations[loc_key]
                        sim.weather_station = forecast.weather_stations[
                            sim.location.weather_station]

                        # If a simulation has an associated forecast, fill the associated fields.
                        if forecast_id:
                            sim.forecast_id = forecast_id
                            sim.forecast_date = forecast.forecast_date
                            reference_ids.append(sim.reference_id)

                        sim_id = db[forecast.configuration[
                            'simulation_collection']].insert_one(
                                sim.persistent_view()).inserted_id
                        sim['_id'] = sim_id
                        simulations_ids.append(sim_id)

                if not is_reference_forecast:
                    # Find which simulations have a reference simulation associated.
                    found_reference_simulations = db.reference_simulations.find(
                        {'_id': {
                            '$in': reference_ids
                        }}, projection=['_id'])

                    found_reference_simulations = set(
                        [s['_id'] for s in found_reference_simulations])

                    diff = set(
                        reference_ids
                    ) - found_reference_simulations - self.scheduled_reference_simulations_ids
                    if len(diff) > 0:
                        # There are simulations that don't have a reference simulation calculated.
                        ref_forecast = copy.deepcopy(yield_forecast)
                        ref_forecast.name = 'Reference simulations for forecast %s' % forecast.name
                        ref_forecast.configuration.weather_series = 'historic'
                        ref_forecast.forecast_date = None

                        rm_locs = []

                        for loc_key, loc_simulations in ref_forecast.simulations.items(
                        ):
                            # Filter reference simulations.
                            loc_simulations[:] = [
                                x for x in loc_simulations
                                if x.reference_id in diff
                            ]

                            if len(loc_simulations) == 0:
                                rm_locs.append(loc_key)

                        for loc_key in rm_locs:
                            del ref_forecast.locations[loc_key]
                            del ref_forecast.simulations[loc_key]

                        self.schedule_forecast(ref_forecast,
                                               priority=RUN_REFERENCE_FORECAST)
                        self.scheduled_reference_simulations_ids |= diff
                        logging.info(
                            'Scheduled reference simulations for forecast: %s'
                            % forecast.name)
                else:
                    # Remove this reference forecasts id's.
                    self.scheduled_reference_simulations_ids -= set(
                        reference_ids)

                progress_monitor.update_progress(new_value=3)

                forecast.paths.run_script_path = CampaignWriter.write_campaign(
                    forecast, output_dir=forecast.paths.rundir)
                forecast.simulation_count = len(simulations_ids)

                progress_monitor.update_progress(new_value=4)

                # Insertar ID's de simulaciones en el pronóstico.
                if forecast_id:
                    db.forecasts.update_one(
                        {"_id": forecast_id},
                        {"$push": {
                            "simulations": {
                                "$each": simulations_ids
                            }
                        }})

                # Ejecutar simulaciones.
                weather_series_monitor = ProgressMonitor()
                progress_monitor.add_subjob(weather_series_monitor,
                                            job_name='Run pSIMS')
                psims_exit_code = self.psims_runner.run(
                    forecast,
                    progress_monitor=weather_series_monitor,
                    verbose=True)

                # Check results
                if psims_exit_code == 0:
                    inserted_simulations = db[
                        forecast.configuration['simulation_collection']].find(
                            {
                                '_id': {
                                    '$in': simulations_ids
                                },
                                # Find simulations that have results field (either cycle or daily).
                                # This property is created by the pSIMS Mongo hook so if a simulation doesn't have this
                                # field it means that the execution inside pSIMS failed.
                                '$or': [{
                                    'daily_results': {
                                        '$exists': True
                                    }
                                }, {
                                    'cycle_results': {
                                        '$exists': True
                                    }
                                }]
                            },
                            projection=[
                                'daily_results', 'cycle_results', 'name'
                            ])

                    if len(simulations_ids) != inserted_simulations.count():
                        raise RuntimeError(
                            'Mismatch between simulations id\'s length and finished simulations '
                            'count (%s != %s)' %
                            (len(simulations_ids),
                             inserted_simulations.count()))

                    if 'HWAM' in forecast.results.cycle:
                        # Check that there are no -99 values in the crop yield.
                        for sim in inserted_simulations:
                            if 'cycle_results' not in sim:
                                continue
                            for scen_idx, scenario in enumerate(
                                    sim['cycle_results']['HWAM']['scenarios']):

                                if not (isinstance(scenario['value'], int) or
                                        isinstance(scenario['value'], float)):
                                    # Nested years inside the scenario.
                                    for year_index, v in enumerate(
                                            scenario['value']):
                                        if v['value'] < 0:
                                            raise RuntimeError(
                                                'Found a negative value for HWAM inside a simulation '
                                                '(%s, id = %s, scenario index = %d, year index = %d).'
                                                % (sim['name'], sim['_id'],
                                                   scen_idx, year_index))

                                elif scenario['value'] < 0:
                                    raise RuntimeError(
                                        'Found a negative value for HWAM inside a simulation (%s, '
                                        'id = %s, scenario index = %d).' %
                                        (sim['name'], sim['_id'], scen_idx))

                logging.getLogger().info(
                    'Finished running forecast "%s" (time=%s).\n' %
                    (forecast.name, datetime.now() - run_start_time))
            except:
                logging.getLogger().error(
                    "Failed to run forecast '%s'. Reason: %s" %
                    (forecast.name, log_format_exception()))

                exception_raised = True
            finally:
                if exception_raised or psims_exit_code != 0:
                    logging.info('Rolling back DB data for forecast "%s".' %
                                 forecast_full_name)
                    if db:
                        if simulations_ids and len(simulations_ids) > 0:
                            db[forecast.configuration[
                                'simulation_collection']].delete_many(
                                    {"_id": {
                                        "$in": simulations_ids
                                    }})
                        if forecast_id:
                            db.forecasts.delete_one({"_id": forecast_id})
                    return -1

                if not psims_exit_code or psims_exit_code == 0:
                    # Clean the rundir.
                    if os.path.exists(forecast.paths.rundir):
                        shutil.rmtree(forecast.paths.rundir)

                if psims_exit_code == 0:
                    # Clean pSIMS run folder.
                    rundir_regex = re.compile('.+/run(\d){3}$')
                    files_filter = lambda file_name: rundir_regex.match(
                        file_name) is not None

                    psims_run_dirs = sorted(listdir_fullpath(
                        forecast.paths.psims, filter=files_filter),
                                            reverse=True)

                    if len(psims_run_dirs) > 0:
                        # Remove the last runNNN directory (the one this execution created).
                        shutil.rmtree(psims_run_dirs[0])

                return psims_exit_code