コード例 #1
0
def download_single_watershed_ecmwf_data(watershed,
                                         ecmwf_rapid_prediction_directory):
    """
    Loads single watersheds ECMWF datasets from data store
    """
    if ecmwf_rapid_prediction_directory \
        and os.path.exists(ecmwf_rapid_prediction_directory) \
        and watershed.ecmwf_data_store_watershed_name \
        and watershed.ecmwf_data_store_subbasin_name:
            
        #get data engine
        data_store = watershed.data_store
        if 'ckan' == data_store.data_store_type.code_name:
            #get dataset managers
            data_manager = ECMWFRAPIDDatasetManager(data_store.api_endpoint,
                                                    data_store.api_key)    
            #load current datasets
            data_manager.download_recent_resource(watershed.ecmwf_data_store_watershed_name, 
                                                  watershed.ecmwf_data_store_subbasin_name,
                                                  ecmwf_rapid_prediction_directory)
    
        path_to_predicitons = os.path.join(ecmwf_rapid_prediction_directory,
                                           "{0}-{1}".format(watershed.ecmwf_data_store_watershed_name, 
                                                            watershed.ecmwf_data_store_subbasin_name) 
                                           )

        if os.path.exists(path_to_predicitons):
            prediction_directories = sorted(os.listdir(path_to_predicitons), 
                                            reverse=True)[14:]
            #remove oldest datasets if more than 14 exist
            try:
                for prediction_directory in prediction_directories:
                    rmtree(os.path.join(path_to_predicitons, prediction_directory))
            except OSError:
                pass
コード例 #2
0
def download_single_watershed_ecmwf_data(watershed, 
                                         ecmwf_rapid_prediction_directory,
                                         app_instance_id):
    """
    Loads single watersheds ECMWF datasets from data store
    """
    if ecmwf_rapid_prediction_directory \
        and os.path.exists(ecmwf_rapid_prediction_directory) \
        and watershed.ecmwf_data_store_watershed_name \
        and watershed.ecmwf_data_store_subbasin_name:
            
        #get data engine
        data_store = watershed.data_store
        if 'ckan' == data_store.data_store_type.code_name:
            #get dataset managers
            data_manager = ECMWFRAPIDDatasetManager(data_store.api_endpoint,
                                                    data_store.api_key)    
            #load current datasets
            data_manager.download_recent_resource(watershed.ecmwf_data_store_watershed_name, 
                                                  watershed.ecmwf_data_store_subbasin_name,
                                                  ecmwf_rapid_prediction_directory)
    
        path_to_predicitons = os.path.join(ecmwf_rapid_prediction_directory,
                                           "{0}-{1}".format(watershed.ecmwf_data_store_watershed_name, 
                                                            watershed.ecmwf_data_store_subbasin_name) 
                                           )

        if os.path.exists(path_to_predicitons):
            prediction_directories = sorted(os.listdir(path_to_predicitons), 
                                            reverse=True)[14:]
            #remove oldest datasets if more than 14 exist
            try:
                for prediction_directory in prediction_directories:
                    rmtree(os.path.join(path_to_predicitons, prediction_directory))
            except OSError:
                pass
            
            if watershed.geoserver_id > 1 and app_instance_id and watershed.geoserver_search_for_flood_map:
                try:
                    #get geoserver engine
                    geoserver_manager = GeoServerDatasetManager(engine_url=watershed.geoserver.url,
                                                                username=watershed.geoserver.username,
                                                                password=watershed.geoserver.password,
                                                                app_instance_id=app_instance_id)
                    #remove old geoserver layers
                    flood_map_layer_name_beginning = "%s-%s-floodmap-" % (watershed.ecmwf_data_store_watershed_name,
                                                                          watershed.ecmwf_data_store_subbasin_name)
                    geoserver_directories = sorted([d for d in os.listdir(path_to_predicitons) \
                                        if os.path.isdir(os.path.join(path_to_predicitons, d))],
                                         reverse=True)[7:]
                    for geoserver_directory in geoserver_directories:
                        layer_name = geoserver_manager.get_layer_name("%s%s" % (flood_map_layer_name_beginning, geoserver_directory))
                        print("Deleting geoserver layer group: {0}".format(layer_name))
                        #TODO: CHECK IF EXISTS BEFORE REMOVING
                        geoserver_manager.purge_remove_geoserver_layer_group(layer_name)
                except Exception as ex:
                    print(ex)
                    pass
コード例 #3
0
def _download_single_watershed_ecmwf_data(watershed,
                                          ecmwf_rapid_prediction_directory):
    """
    Loads single watersheds ECMWF datasets from data store
    """
    if ecmwf_rapid_prediction_directory \
            and os.path.exists(ecmwf_rapid_prediction_directory) \
            and watershed.ecmwf_data_store_watershed_name \
            and watershed.ecmwf_data_store_subbasin_name:

        # get data engine
        data_store = watershed.data_store
        if data_store.data_store_type.code_name == 'ckan':
            # get dataset managers
            data_manager = ECMWFRAPIDDatasetManager(data_store.api_endpoint,
                                                    data_store.api_key)
            # load current datasets
            data_manager.download_recent_resource(
                watershed.ecmwf_data_store_watershed_name,
                watershed.ecmwf_data_store_subbasin_name,
                ecmwf_rapid_prediction_directory
            )

        path_to_predicitons = \
            os.path.join(ecmwf_rapid_prediction_directory,
                         "{0}-{1}"
                         .format(watershed.ecmwf_data_store_watershed_name,
                                 watershed.ecmwf_data_store_subbasin_name)
                         )

        if os.path.exists(path_to_predicitons):
            prediction_directories = sorted(os.listdir(path_to_predicitons),
                                            reverse=True)[14:]
            # remove oldest datasets if more than 14 exist
            try:
                for prediction_directory in prediction_directories:
                    rmtree(os.path.join(path_to_predicitons,
                                        prediction_directory))
            except OSError:
                pass
コード例 #4
0
def run_ecmwf_forecast_process(
        rapid_executable_location,  # path to RAPID executable
        rapid_io_files_location,  # path ro RAPID input/output directory
        ecmwf_forecast_location,  # path to ECMWF forecasts
        subprocess_log_directory,  # path to store HTCondor/multiprocess logs
        main_log_directory,  # path to store main logs
        region="",  #1 of the 12 partitioned ECMWF files. Leave empty if using global,
        data_store_url="",  # CKAN API url
        data_store_api_key="",  # CKAN API Key,
        data_store_owner_org="",  # CKAN owner organization
        app_instance_id="",  # Streamflow Prediction tool instance ID
        sync_rapid_input_with_ckan=False,  # match Streamflow Prediciton tool RAPID input
        download_ecmwf=True,  # Download recent ECMWF forecast before running,
        date_string="",  # string of date of interest
        ftp_host="",  # ECMWF ftp site path
        ftp_login="",  # ECMWF ftp login name
        ftp_passwd="",  # ECMWF ftp password
        ftp_directory="",  # ECMWF ftp directory
        delete_past_ecmwf_forecasts=True,  # Deletes all past forecasts before next run
        upload_output_to_ckan=False,  # upload data to CKAN and remove local copy
        delete_output_when_done=False,  # delete all output data from this code
        initialize_flows=False,  # use forecast to initialize next run
        warning_flow_threshold=100,  # flows below this threshold will be ignored
        era_interim_data_location="",  # path to ERA Interim return period data
        create_warning_points=False,  # generate waring points for Streamflow Prediction Tool
        autoroute_executable_location="",  # location of AutoRoute executable
        autoroute_io_files_location="",  # path to AutoRoute input/outpuf directory
        geoserver_url="",  # url to API endpoint ending in geoserver/rest
        geoserver_username="",  # username for geoserver
        geoserver_password="",  # password for geoserver
        mp_mode='htcondor',  # valid options are htcondor and multiprocess,
        mp_execute_directory="",  # required if using multiprocess mode
        initialization_time_step=12,  # time step of ECMWF Forecast Process, in hours
        #doesn't appear to be used MJS 8/23/2020... watersheds_with_dams_list=[], # a list of all watersheds where dam outflows are being forced
        #doesn't appear to be used, MJS 8/23/2020... stream_ids_with_dams_dict={}, # a dictionary with the watershed key and a value of a list of stream IDs where dams are located
        #doesn't appear to be used, MJS 8/23/2020... dam_outflows={} # a dictionary with the key as a stream ID and a value of a list of outflows
    BS_opt_dam=False,
        IS_dam_tot=0,
        IS_dam_use=0,
        dam_tot_id_file="",
        dam_use_id_file="",
        dam_file=""):
    """
    This it the main ECMWF RAPID forecast process
    """
    time_begin_all = datetime.datetime.utcnow()

    LOCAL_SCRIPTS_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
    LOCK_INFO_FILE = os.path.join(main_log_directory,
                                  "spt_compute_ecmwf_run_info_lock.txt")

    log_file_path = os.path.join(
        main_log_directory, "spt_compute_ecmwf_{0}.log".format(
            time_begin_all.strftime("%y%m%d%H%M%S")))

    with CaptureStdOutToLog(log_file_path):

        if not CONDOR_ENABLED and mp_mode == 'htcondor':
            raise ImportError(
                "condorpy is not installed. Please install condorpy to use the 'htcondor' option."
            )

        if not AUTOROUTE_ENABLED and autoroute_executable_location and autoroute_io_files_location:
            raise ImportError(
                "AutoRoute is not enabled. Please install tethys_dataset_services"
                " and AutoRoutePy to use the AutoRoute option.")

        if mp_mode == "multiprocess":
            if not mp_execute_directory or not os.path.exists(
                    mp_execute_directory):
                raise Exception(
                    "If mode is multiprocess, mp_execute_directory is required ..."
                )

        if sync_rapid_input_with_ckan and app_instance_id and data_store_url and data_store_api_key:
            # sync with data store
            ri_manager = RAPIDInputDatasetManager(data_store_url,
                                                  data_store_api_key, 'ecmwf',
                                                  app_instance_id)
            ri_manager.sync_dataset(
                os.path.join(rapid_io_files_location, 'input'))

        # clean up old log files
        clean_logs(subprocess_log_directory,
                   main_log_directory,
                   log_file_path=log_file_path)

        data_manager = None
        if upload_output_to_ckan and data_store_url and data_store_api_key:
            if not SPT_DATASET_ENABLED:
                raise ImportError(
                    "spt_dataset_manager is not installed. "
                    "Please install spt_dataset_manager to use the 'ckan' options."
                )

            # init data manager for CKAN
            data_manager = ECMWFRAPIDDatasetManager(data_store_url,
                                                    data_store_api_key,
                                                    data_store_owner_org)

        # get list of correclty formatted rapid input directories in rapid directory
        rapid_input_directories = get_valid_watershed_list(
            os.path.join(rapid_io_files_location, "input"))

        if download_ecmwf and ftp_host:
            # get list of folders to download
            ecmwf_folders = sorted(
                get_ftp_forecast_list(
                    'Runoff.%s*%s*.netcdf.tar*' % (date_string, region),
                    ftp_host, ftp_login, ftp_passwd, ftp_directory))
        else:
            # get list of folders to run
            ecmwf_folders = sorted(
                glob(
                    os.path.join(ecmwf_forecast_location,
                                 'Runoff.' + date_string + '*.netcdf')))

        # LOAD LOCK INFO FILE
        last_forecast_date = datetime.datetime.utcfromtimestamp(0)
        if os.path.exists(LOCK_INFO_FILE):
            with open(LOCK_INFO_FILE) as fp_lock_info:
                previous_lock_info = json.load(fp_lock_info)

            if previous_lock_info['running']:
                print("Another SPT ECMWF forecast process is running.\n"
                      "The lock file is located here: {0}\n"
                      "If this is an error, you have two options:\n"
                      "1) Delete the lock file.\n"
                      "2) Edit the lock file and set \"running\" to false. \n"
                      "Then, re-run this script. \n Exiting ...".format(
                          LOCK_INFO_FILE))
                return
            else:
                last_forecast_date = datetime.datetime.strptime(
                    previous_lock_info['last_forecast_date'], '%Y%m%d%H')
                run_ecmwf_folders = []
                for ecmwf_folder in ecmwf_folders:
                    # get date
                    forecast_date = get_datetime_from_forecast_folder(
                        ecmwf_folder)
                    # if more recent, add to list
                    # check to determine if forecast time step is 12 or 24 hours
                    if initialization_time_step == 24:
                        if forecast_date > last_forecast_date and forecast_date.hour != 12:
                            run_ecmwf_folders.append(ecmwf_folder)
                    elif initialization_time_step == 12:
                        if forecast_date > last_forecast_date:
                            run_ecmwf_folders.append(ecmwf_folder)
                ecmwf_folders = run_ecmwf_folders

        if not ecmwf_folders:
            print("No new forecasts found to run. Exiting ...")
            return

        # GENERATE NEW LOCK INFO FILE
        update_lock_info_file(LOCK_INFO_FILE, True,
                              last_forecast_date.strftime('%Y%m%d%H'))

        # rapid_input_directories_sub = [rapid_input_directory for rapid_input_directory in rapid_input_directories if hydroshed_index in rapid_input_directory]
        # Try/Except added for lock file
        try:
            # ADD SEASONAL INITIALIZATION WHERE APPLICABLE
            if initialize_flows:
                initial_forecast_date_timestep = get_date_timestep_from_forecast_folder(
                    ecmwf_folders[0])
                seasonal_init_job_list = []
                for rapid_input_directory in rapid_input_directories:
                    seasonal_master_watershed_input_directory = os.path.join(
                        rapid_io_files_location, "input",
                        rapid_input_directory)
                    # add seasonal initialization if no initialization file and historical Qout file exists
                    if era_interim_data_location and os.path.exists(
                            era_interim_data_location):
                        era_interim_watershed_directory = os.path.join(
                            era_interim_data_location, rapid_input_directory)
                        if os.path.exists(era_interim_watershed_directory):
                            # INITIALIZE FROM SEASONAL AVERAGE FILE
                            seasonal_streamflow_file = glob(
                                os.path.join(era_interim_watershed_directory,
                                             "seasonal_average*.nc"))
                            if seasonal_streamflow_file:
                                seasonal_init_job_list.append(
                                    (seasonal_streamflow_file[0],
                                     seasonal_master_watershed_input_directory,
                                     initial_forecast_date_timestep,
                                     "seasonal_average_file"))
                            else:
                                # INITIALIZE FROM HISTORICAL STREAMFLOW FILE
                                historical_qout_file = glob(
                                    os.path.join(
                                        era_interim_watershed_directory,
                                        "Qout*.nc"))
                                if historical_qout_file:
                                    seasonal_init_job_list.append((
                                        historical_qout_file[0],
                                        seasonal_master_watershed_input_directory,
                                        initial_forecast_date_timestep,
                                        "historical_streamflow_file"))
                if seasonal_init_job_list:
                    # use multiprocessing instead of htcondor due to potential for huge file sizes
                    if len(seasonal_init_job_list) > 1:
                        seasonal_pool = mp_Pool()
                        seasonal_pool.imap(
                            compute_seasonal_initial_rapid_flows_multicore_worker,
                            seasonal_init_job_list,
                            chunksize=1)
                        seasonal_pool.close()
                        seasonal_pool.join()
                    else:
                        compute_seasonal_initial_rapid_flows_multicore_worker(
                            seasonal_init_job_list[0])
            # ----------------------------------------------------------------------
            # BEGIN ECMWF-RAPID FORECAST LOOP
            # ----------------------------------------------------------------------
            master_job_info_list = []
            for ecmwf_folder in ecmwf_folders:
                if download_ecmwf:
                    # download forecast
                    ecmwf_folder = download_and_extract_ftp(
                        ecmwf_forecast_location, ecmwf_folder, ftp_host,
                        ftp_login, ftp_passwd, ftp_directory,
                        delete_past_ecmwf_forecasts)

                # get list of forecast files
                ecmwf_forecasts = glob(
                    os.path.join(ecmwf_folder, '*.runoff.%s*nc' % region))

                # look for old version of forecasts
                if not ecmwf_forecasts:
                    ecmwf_forecasts = glob(os.path.join(ecmwf_folder, 'full_*.runoff.netcdf')) + \
                                      glob(os.path.join(ecmwf_folder, '*.52.205.*.runoff.netcdf'))

                if not ecmwf_forecasts:
                    print("ERROR: Forecasts not found in folder. Exiting ...")
                    update_lock_info_file(
                        LOCK_INFO_FILE, False,
                        last_forecast_date.strftime('%Y%m%d%H'))
                    return

                # make the largest files first
                ecmwf_forecasts.sort(key=os.path.getsize, reverse=True)

                forecast_date_timestep = get_date_timestep_from_forecast_folder(
                    ecmwf_folder)
                print("Running ECMWF Forecast: {0}".format(
                    forecast_date_timestep))

                # submit jobs to downsize ecmwf files to watershed
                rapid_watershed_jobs = {}
                for rapid_input_directory in rapid_input_directories:

                    # keep list of jobs
                    rapid_watershed_jobs[rapid_input_directory] = {
                        'jobs': [],
                        'jobs_info': []
                    }
                    print("Running forecasts for: {0} {1}".format(
                        rapid_input_directory, os.path.basename(ecmwf_folder)))

                    watershed, subbasin = get_watershed_subbasin_from_folder(
                        rapid_input_directory)
                    master_watershed_input_directory = os.path.join(
                        rapid_io_files_location, "input",
                        rapid_input_directory)
                    master_watershed_outflow_directory = os.path.join(
                        rapid_io_files_location, 'output',
                        rapid_input_directory, forecast_date_timestep)
                    try:
                        os.makedirs(master_watershed_outflow_directory)
                    except OSError:
                        pass

                    # initialize HTCondor/multiprocess Logging Directory
                    subprocess_forecast_log_dir = os.path.join(
                        subprocess_log_directory, forecast_date_timestep)
                    try:
                        os.makedirs(subprocess_forecast_log_dir)
                    except OSError:
                        pass

                    # add USGS gage data to initialization file
                    if initialize_flows:
                        # update intial flows with usgs data
                        update_inital_flows_usgs(
                            master_watershed_input_directory,
                            forecast_date_timestep)

                    # create jobs for HTCondor/multiprocess
                    for watershed_job_index, forecast in enumerate(
                            ecmwf_forecasts):
                        ensemble_number = get_ensemble_number_from_forecast(
                            forecast)

                        # get basin names
                        outflow_file_name = 'Qout_%s_%s_%s.nc' % (
                            watershed.lower(), subbasin.lower(),
                            ensemble_number)
                        node_rapid_outflow_file = outflow_file_name
                        master_rapid_outflow_file = os.path.join(
                            master_watershed_outflow_directory,
                            outflow_file_name)

                        job_name = 'job_%s_%s_%s_%s' % (forecast_date_timestep,
                                                        watershed, subbasin,
                                                        ensemble_number)

                        rapid_watershed_jobs[rapid_input_directory][
                            'jobs_info'].append({
                                'watershed':
                                watershed,
                                'subbasin':
                                subbasin,
                                'outflow_file_name':
                                master_rapid_outflow_file,
                                'forecast_date_timestep':
                                forecast_date_timestep,
                                'ensemble_number':
                                ensemble_number,
                                'master_watershed_outflow_directory':
                                master_watershed_outflow_directory,
                                'data_manager':
                                data_manager  # added this to try to upload forecast in mp
                            })
                        if mp_mode == "htcondor":
                            # create job to downscale forecasts for watershed
                            job = CJob(job_name, tmplt.vanilla_transfer_files)
                            job.set(
                                'executable',
                                os.path.join(LOCAL_SCRIPTS_DIRECTORY,
                                             'htcondor_ecmwf_rapid.py'))
                            job.set(
                                'transfer_input_files', "%s, %s, %s" %
                                (forecast, master_watershed_input_directory,
                                 LOCAL_SCRIPTS_DIRECTORY))
                            job.set('initialdir', subprocess_forecast_log_dir)
                            job.set(
                                'arguments', '%s %s %s %s %s %s' %
                                (forecast, forecast_date_timestep,
                                 watershed.lower(), subbasin.lower(),
                                 rapid_executable_location, initialize_flows))
                            job.set(
                                'transfer_output_remaps',
                                "\"%s = %s\"" % (node_rapid_outflow_file,
                                                 master_rapid_outflow_file))
                            job.submit()
                            rapid_watershed_jobs[rapid_input_directory][
                                'jobs'].append(job)
                        elif mp_mode == "multiprocess":
                            rapid_watershed_jobs[rapid_input_directory][
                                'jobs'].append((
                                    forecast,
                                    forecast_date_timestep,
                                    watershed.lower(),
                                    subbasin.lower(),
                                    rapid_executable_location,
                                    initialize_flows,
                                    job_name,
                                    master_rapid_outflow_file,
                                    master_watershed_input_directory,
                                    mp_execute_directory,
                                    subprocess_forecast_log_dir,
                                    watershed_job_index,
                                    initialization_time_step,
                                    # dam arguments included, MJS 8/23/2020 ........
                                    BS_opt_dam,
                                    IS_dam_tot,
                                    IS_dam_use,
                                    dam_tot_id_file,
                                    dam_use_id_file,
                                    dam_file))
                            # COMMENTED CODE FOR DEBUGGING SERIALLY
                            ##                    run_ecmwf_rapid_multiprocess_worker((forecast,
                            ##                                                         forecast_date_timestep,
                            ##                                                         watershed.lower(),
                            ##                                                         subbasin.lower(),
                            ##                                                         rapid_executable_location,
                            ##                                                         initialize_flows,
                            ##                                                         job_name,
                            ##                                                         master_rapid_outflow_file,
                            ##                                                         master_watershed_input_directory,
                            ##                                                         mp_execute_directory,
                            ##                                                         subprocess_forecast_log_dir,
                            ##                                                         watershed_job_index
                            ##                                                         initialization_time_step))
                        else:
                            raise Exception(
                                "ERROR: Invalid mp_mode. Valid types are htcondor and multiprocess ..."
                            )

                for rapid_input_directory, watershed_job_info in rapid_watershed_jobs.items(
                ):

                    # add sub job list to master job list
                    master_job_info_list = master_job_info_list + watershed_job_info[
                        'jobs_info']
                    if mp_mode == "htcondor":
                        # wait for jobs to finish then upload files
                        for job_index, job in enumerate(
                                watershed_job_info['jobs']):
                            job.wait()
                            # upload file when done
                            if data_manager:
                                upload_single_forecast(
                                    watershed_job_info['jobs_info'][job_index],
                                    data_manager)

                    elif mp_mode == "multiprocess":
                        pool_main = mp_Pool()
                        func = partial(run_ecmwf_rapid_multiprocess_worker,
                                       watershed_job_info['jobs_info'])
                        multiprocess_worker_list = pool_main.imap_unordered(
                            func,
                            watershed_job_info['jobs'],
                            # watershed_job_info['jobs'],
                            chunksize=1)
                        if data_manager:
                            for multi_job_index in multiprocess_worker_list:
                                # upload file when done
                                upload_single_forecast(
                                    watershed_job_info['jobs_info']
                                    [multi_job_index], data_manager)

                        # just in case ...
                        pool_main.close()
                        pool_main.join()

                    # when all jobs in watershed are done, generate warning points
                    if create_warning_points:
                        watershed, subbasin = get_watershed_subbasin_from_folder(
                            rapid_input_directory)
                        forecast_directory = os.path.join(
                            rapid_io_files_location, 'output',
                            rapid_input_directory, forecast_date_timestep)

                        era_interim_watershed_directory = os.path.join(
                            era_interim_data_location, rapid_input_directory)
                        if os.path.exists(era_interim_watershed_directory):
                            print(
                                "Generating warning points for {0}-{1} from {2}"
                                .format(watershed, subbasin,
                                        forecast_date_timestep))
                            era_interim_files = glob(
                                os.path.join(era_interim_watershed_directory,
                                             "return_period*.nc"))
                            if era_interim_files:
                                try:
                                    generate_ecmwf_warning_points(
                                        forecast_directory,
                                        era_interim_files[0],
                                        forecast_directory,
                                        threshold=warning_flow_threshold)
                                    if upload_output_to_ckan and data_store_url and data_store_api_key:
                                        data_manager.initialize_run_ecmwf(
                                            watershed, subbasin,
                                            forecast_date_timestep)
                                        data_manager.zip_upload_warning_points_in_directory(
                                            forecast_directory)
                                except Exception as ex:
                                    print(ex)
                                    pass
                            else:
                                print(
                                    "No ERA Interim file found. Skipping ...")
                        else:
                            print(
                                "No ERA Interim directory found for {0}. "
                                "Skipping warning point generation...".format(
                                    rapid_input_directory))

                # initialize flows for next run
                if initialize_flows:
                    # create new init flow files/generate warning point files
                    for rapid_input_directory in rapid_input_directories:
                        input_directory = os.path.join(rapid_io_files_location,
                                                       'input',
                                                       rapid_input_directory)
                        forecast_directory = os.path.join(
                            rapid_io_files_location, 'output',
                            rapid_input_directory, forecast_date_timestep)
                        if os.path.exists(forecast_directory):
                            # loop through all the rapid_namelist files in directory
                            watershed, subbasin = get_watershed_subbasin_from_folder(
                                rapid_input_directory)
                            if initialize_flows:
                                print(
                                    "Initializing flows for {0}-{1} from {2}".
                                    format(watershed, subbasin,
                                           forecast_date_timestep))
                                basin_files = find_current_rapid_output(
                                    forecast_directory, watershed, subbasin)
                                try:
                                    compute_initial_rapid_flows(
                                        basin_files, input_directory,
                                        forecast_date_timestep,
                                        initialization_time_step)
                                except Exception as ex:
                                    print(ex)
                                    pass

                # run autoroute process if added
                if autoroute_executable_location and autoroute_io_files_location:
                    # run autoroute on all of the watersheds
                    run_autorapid_process(autoroute_executable_location,
                                          autoroute_io_files_location,
                                          rapid_io_files_location,
                                          forecast_date_timestep,
                                          subprocess_forecast_log_dir,
                                          geoserver_url, geoserver_username,
                                          geoserver_password, app_instance_id)

                last_forecast_date = get_datetime_from_date_timestep(
                    forecast_date_timestep)

                # update lock info file with next forecast
                update_lock_info_file(LOCK_INFO_FILE, True,
                                      last_forecast_date.strftime('%Y%m%d%H'))

                # ----------------------------------------------------------------------
                # END FORECAST LOOP
                # ----------------------------------------------------------------------
        except Exception as ex:
            print_exc()
            print(ex)
            pass

        # Release & update lock info file with all completed forecasts
        update_lock_info_file(LOCK_INFO_FILE, False,
                              last_forecast_date.strftime('%Y%m%d%H'))

        if delete_output_when_done:
            # delete local datasets
            for job_info in master_job_info_list:
                try:
                    rmtree(job_info['master_watershed_outflow_directory'])
                except OSError:
                    pass
            # delete watershed folder if empty
            for item in os.listdir(
                    os.path.join(rapid_io_files_location, 'output')):
                try:
                    os.rmdir(
                        os.path.join(rapid_io_files_location, 'output', item))
                except OSError:
                    pass

        # print info to user
        time_end = datetime.datetime.utcnow()
        print("Time Begin: {0}".format(time_begin_all))
        print("Time Finish: {0}".format(time_end))
        print("TOTAL TIME: {0}".format(time_end - time_begin_all))
def download_single_watershed_ecmwf_data(watershed,
                                         ecmwf_rapid_prediction_directory,
                                         app_instance_id):
    """
    Loads single watersheds ECMWF datasets from data store
    """
    if ecmwf_rapid_prediction_directory \
        and os.path.exists(ecmwf_rapid_prediction_directory) \
        and watershed.ecmwf_data_store_watershed_name \
        and watershed.ecmwf_data_store_subbasin_name:

        #get data engine
        data_store = watershed.data_store
        if 'ckan' == data_store.data_store_type.code_name:
            #get dataset managers
            data_manager = ECMWFRAPIDDatasetManager(data_store.api_endpoint,
                                                    data_store.api_key)
            #load current datasets
            data_manager.download_recent_resource(
                watershed.ecmwf_data_store_watershed_name,
                watershed.ecmwf_data_store_subbasin_name,
                ecmwf_rapid_prediction_directory)

        path_to_predicitons = os.path.join(
            ecmwf_rapid_prediction_directory,
            "{0}-{1}".format(watershed.ecmwf_data_store_watershed_name,
                             watershed.ecmwf_data_store_subbasin_name))

        if os.path.exists(path_to_predicitons):
            prediction_directories = sorted(os.listdir(path_to_predicitons),
                                            reverse=True)[14:]
            #remove oldest datasets if more than 14 exist
            try:
                for prediction_directory in prediction_directories:
                    rmtree(
                        os.path.join(path_to_predicitons,
                                     prediction_directory))
            except OSError:
                pass

            if watershed.geoserver_id > 1 and app_instance_id and watershed.geoserver_search_for_flood_map:
                try:
                    #get geoserver engine
                    geoserver_manager = GeoServerDatasetManager(
                        engine_url=watershed.geoserver.url,
                        username=watershed.geoserver.username,
                        password=watershed.geoserver.password,
                        app_instance_id=app_instance_id)
                    #remove old geoserver layers
                    flood_map_layer_name_beginning = "%s-%s-floodmap-" % (
                        watershed.ecmwf_data_store_watershed_name,
                        watershed.ecmwf_data_store_subbasin_name)
                    geoserver_directories = sorted([d for d in os.listdir(path_to_predicitons) \
                                        if os.path.isdir(os.path.join(path_to_predicitons, d))],
                                         reverse=True)[7:]
                    for geoserver_directory in geoserver_directories:
                        layer_name = geoserver_manager.get_layer_name(
                            "%s%s" % (flood_map_layer_name_beginning,
                                      geoserver_directory))
                        print("Deleting geoserver layer group: {0}".format(
                            layer_name))
                        #TODO: CHECK IF EXISTS BEFORE REMOVING
                        geoserver_manager.purge_remove_geoserver_layer_group(
                            layer_name)
                except Exception as ex:
                    print(ex)
                    pass
コード例 #6
0
def run_ecmwf_rapid_process(rapid_executable_location, #path to RAPID executable
                            rapid_io_files_location, #path ro RAPID input/output directory
                            ecmwf_forecast_location, #path to ECMWF forecasts
                            subprocess_log_directory, #path to store HTCondor/multiprocess logs
                            main_log_directory, #path to store main logs
                            data_store_url="", #CKAN API url
                            data_store_api_key="", #CKAN API Key,
                            data_store_owner_org="", #CKAN owner organization
                            app_instance_id="", #Streamflow Prediction tool instance ID
                            sync_rapid_input_with_ckan=False, #match Streamflow Prediciton tool RAPID input
                            download_ecmwf=True, #Download recent ECMWF forecast before running,
                            date_string=None, #string of date of interest
                            ftp_host="", #ECMWF ftp site path
                            ftp_login="", #ECMWF ftp login name
                            ftp_passwd="", #ECMWF ftp password
                            ftp_directory="", #ECMWF ftp directory
                            upload_output_to_ckan=False, #upload data to CKAN and remove local copy
                            delete_output_when_done=False, #delete all output data from this code
                            initialize_flows=False, #use forecast to initialize next run
                            era_interim_data_location="", #path to ERA Interim return period data 
                            create_warning_points=False, #generate waring points for Streamflow Prediction Tool
                            autoroute_executable_location="", #location of AutoRoute executable
                            autoroute_io_files_location="", #path to AutoRoute input/outpuf directory
                            geoserver_url='', #url to API endpoint ending in geoserver/rest
                            geoserver_username='', #username for geoserver
                            geoserver_password='', #password for geoserver
                            mp_mode='htcondor', #valid options are htcondor and multiprocess,
                            mp_execute_directory='',#required if using multiprocess mode
                            ):
    """
    This it the main ECMWF RAPID process
    """
    time_begin_all = datetime.datetime.utcnow()
    if date_string == None:
        date_string = time_begin_all.strftime('%Y%m%d')

    if mp_mode == "multiprocess":
        if not mp_execute_directory or not os.path.exists(mp_execute_directory):
            raise Exception("If mode is multiprocess, mp_execute_directory is required ...")
            
    #date_string = datetime.datetime(2016,2,12).strftime('%Y%m%d')
    local_scripts_location = os.path.dirname(os.path.realpath(__file__))

    if sync_rapid_input_with_ckan and app_instance_id and data_store_url and data_store_api_key:
        #sync with data store
        ri_manager = RAPIDInputDatasetManager(data_store_url,
                                              data_store_api_key,
                                              'ecmwf',
                                              app_instance_id)
        ri_manager.sync_dataset(os.path.join(rapid_io_files_location,'input'))

    #clean up old log files
    clean_logs(subprocess_log_directory, main_log_directory)

    #get list of correclty formatted rapid input directories in rapid directory
    rapid_input_directories = get_valid_watershed_list(os.path.join(rapid_io_files_location, "input"))
    
    if download_ecmwf and ftp_host:
        #download all files for today
        ecmwf_folders = sorted(download_all_ftp(ecmwf_forecast_location,
                                                'Runoff.%s*.netcdf.tar*' % date_string,
                                                ftp_host,
                                                ftp_login,
                                                ftp_passwd,
                                                ftp_directory))
    else:
        ecmwf_folders = sorted(glob(os.path.join(ecmwf_forecast_location,
                                                 'Runoff.'+date_string+'*.netcdf')))
    data_manager = None
    if upload_output_to_ckan and data_store_url and data_store_api_key:
        #init data manager for CKAN
        data_manager = ECMWFRAPIDDatasetManager(data_store_url,
                                                data_store_api_key,
                                                data_store_owner_org)

    #ADD SEASONAL INITIALIZATION WHERE APPLICABLE
    if initialize_flows:
        initial_forecast_date_timestep = get_date_timestep_from_forecast_folder(ecmwf_folders[0])
        seasonal_init_job_list = []
        for rapid_input_directory in rapid_input_directories:
            seasonal_master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", rapid_input_directory)
            #add seasonal initialization if no initialization file and historical Qout file exists
            if era_interim_data_location and os.path.exists(era_interim_data_location):
                era_interim_watershed_directory = os.path.join(era_interim_data_location, rapid_input_directory)
                if os.path.exists(era_interim_watershed_directory):
                    historical_qout_file = glob(os.path.join(era_interim_watershed_directory, "Qout*.nc"))
                    if historical_qout_file:
                        seasonal_init_job_list.append((historical_qout_file[0], 
                                                       seasonal_master_watershed_input_directory,
                                                       initial_forecast_date_timestep))
        if seasonal_init_job_list:
            #use multiprocessing instead of htcondor due to potential for huge file sizes
            if len(seasonal_init_job_list) > 1:
                seasonal_pool = mp_Pool()
                seasonal_pool.imap(compute_seasonal_initial_rapid_flows_multicore_worker,
                                   seasonal_init_job_list,
                                   chunksize=1)
                seasonal_pool.close()
                seasonal_pool.join()
            else:
                compute_seasonal_initial_rapid_flows_multicore_worker(seasonal_init_job_list[0])

    #prepare ECMWF files
    master_job_info_list = []
    for ecmwf_folder in ecmwf_folders:
        ecmwf_forecasts = glob(os.path.join(ecmwf_folder,'full_*.runoff.netcdf')) + \
                          glob(os.path.join(ecmwf_folder,'*.52.205.*.runoff.netcdf'))
        #look for new version of forecasts
        if not ecmwf_forecasts:
            ecmwf_forecasts = glob(os.path.join(ecmwf_folder,'*.runoff.nc'))
            
        #make the largest files first
        ecmwf_forecasts.sort(key=os.path.getsize, reverse=True)

        forecast_date_timestep = get_date_timestep_from_forecast_folder(ecmwf_folder)
        print forecast_date_timestep
        #submit jobs to downsize ecmwf files to watershed
        iteration = 0
        rapid_watershed_jobs = {}
        for rapid_input_directory in rapid_input_directories:
            #keep list of jobs
            rapid_watershed_jobs[rapid_input_directory] = {
                                                            'jobs': [], 
                                                            'jobs_info': []
                                                           }
            print "Running forecasts for:", rapid_input_directory, os.path.basename(ecmwf_folder)
            watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory)
            master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", rapid_input_directory)
            master_watershed_outflow_directory = os.path.join(rapid_io_files_location, 'output',
                                                              rapid_input_directory, forecast_date_timestep)
            #add USGS gage data to initialization file
            if initialize_flows:
                #update intial flows with usgs data
                update_inital_flows_usgs(master_watershed_input_directory, 
                                         forecast_date_timestep)
            
            #create jobs for HTCondor
            for watershed_job_index, forecast in enumerate(ecmwf_forecasts):
                ensemble_number = get_ensemble_number_from_forecast(forecast)
                try:
                    os.makedirs(master_watershed_outflow_directory)
                except OSError:
                    pass

                #initialize HTCondor Logging Directory
                subprocess_forecast_log_dir = os.path.join(subprocess_log_directory, forecast_date_timestep)
                try:
                    os.makedirs(subprocess_forecast_log_dir)
                except OSError:
                    pass
                
                #get basin names
                outflow_file_name = 'Qout_%s_%s_%s.nc' % (watershed.lower(), subbasin.lower(), ensemble_number)
                node_rapid_outflow_file = outflow_file_name
                master_rapid_outflow_file = os.path.join(master_watershed_outflow_directory, outflow_file_name)

                job_name = 'job_%s_%s_%s_%s_%s' % (forecast_date_timestep, watershed, subbasin, ensemble_number, iteration)
                if mp_mode == "htcondor":
                    #create job to downscale forecasts for watershed
                    job = CJob(job_name, tmplt.vanilla_transfer_files)
                    job.set('executable',os.path.join(local_scripts_location,'htcondor_ecmwf_rapid.py'))
                    job.set('transfer_input_files', "%s, %s, %s" % (forecast, master_watershed_input_directory, local_scripts_location))
                    job.set('initialdir', subprocess_forecast_log_dir)
                    job.set('arguments', '%s %s %s %s %s %s' % (forecast, forecast_date_timestep, watershed.lower(), subbasin.lower(),
                                                                rapid_executable_location, initialize_flows))
                    job.set('transfer_output_remaps',"\"%s = %s\"" % (node_rapid_outflow_file, master_rapid_outflow_file))
                    job.submit()
                    rapid_watershed_jobs[rapid_input_directory]['jobs'].append(job)
                    rapid_watershed_jobs[rapid_input_directory]['jobs_info'].append({'watershed' : watershed,
                                                                                     'subbasin' : subbasin,
                                                                                     'outflow_file_name' : master_rapid_outflow_file,
                                                                                     'forecast_date_timestep' : forecast_date_timestep,
                                                                                     'ensemble_number': ensemble_number,
                                                                                     'master_watershed_outflow_directory': master_watershed_outflow_directory,
                                                                                     })
                elif mp_mode == "multiprocess":
                    rapid_watershed_jobs[rapid_input_directory]['jobs'].append((forecast,
                                                                                forecast_date_timestep,
                                                                                watershed.lower(),
                                                                                subbasin.lower(),
                                                                                rapid_executable_location,
                                                                                initialize_flows,
                                                                                job_name,
                                                                                master_rapid_outflow_file,
                                                                                master_watershed_input_directory,
                                                                                mp_execute_directory,
                                                                                subprocess_forecast_log_dir,
                                                                                watershed_job_index))
##                    run_ecmwf_rapid_multiprocess_worker((forecast,
##                                                         forecast_date_timestep,
##                                                         watershed.lower(),
##                                                         subbasin.lower(),
##                                                         rapid_executable_location,
##                                                         initialize_flows,
##                                                         job_name,
##                                                         master_rapid_outflow_file,
##                                                         master_watershed_input_directory,
##                                                         mp_execute_directory,
##                                                         subprocess_forecast_log_dir,                     
##                                                         watershed_job_index))
                else:
                    raise Exception("ERROR: Invalid mp_mode. Valid types are htcondor and multiprocess ...")
                iteration += 1
        
        
        for rapid_input_directory, watershed_job_info in rapid_watershed_jobs.iteritems():
            #add sub job list to master job list
            master_job_info_list = master_job_info_list + watershed_job_info['jobs_info']
            if mp_mode == "htcondor":
                #wait for jobs to finish then upload files
                for job_index, job in enumerate(watershed_job_info['jobs']):
                    job.wait()
                    #upload file when done
                    if data_manager:
                        upload_single_forecast(watershed_job_info['jobs_info'][job_index], data_manager)
                        
            elif mp_mode == "multiprocess":
                pool_main = mp_Pool()
                multiprocess_worker_list = pool_main.imap_unordered(run_ecmwf_rapid_multiprocess_worker, 
                                                                    watershed_job_info['jobs'], 
                                                                    chunksize=1)
                if data_manager:
                    for multi_job_output in multiprocess_worker_list:
                        job_index = multi_job_output[0]
                        #upload file when done
                        upload_single_forecast(watershed_job_info['jobs_info'][job_index], data_manager)
                        
                #just in case ...
                pool_main.close()
                pool_main.join()

            #when all jobs in watershed are done, generate warning points
            if create_warning_points:
                watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory)
                forecast_directory = os.path.join(rapid_io_files_location, 
                                                  'output', 
                                                  rapid_input_directory, 
                                                  forecast_date_timestep)

                era_interim_watershed_directory = os.path.join(era_interim_data_location, rapid_input_directory)
                if os.path.exists(era_interim_watershed_directory):
                    print "Generating Warning Points for", watershed, subbasin, "from", forecast_date_timestep
                    era_interim_files = glob(os.path.join(era_interim_watershed_directory, "return_period*.nc"))
                    if era_interim_files:
                        try:
                            generate_warning_points(forecast_directory, era_interim_files[0], forecast_directory, threshold=10)
                            if upload_output_to_ckan and data_store_url and data_store_api_key:
                                data_manager.initialize_run_ecmwf(watershed, subbasin, forecast_date_timestep)
                                data_manager.zip_upload_warning_points_in_directory(forecast_directory)
                        except Exception, ex:
                            print ex
                            pass
                    else:
                        print "No ERA Interim file found. Skipping ..."
                else:
                    print "No ERA Interim directory found for", rapid_input_directory, ". Skipping warning point generation..."
            

        #initialize flows for next run
        if initialize_flows:
            #create new init flow files/generate warning point files
            for rapid_input_directory in rapid_input_directories:
                input_directory = os.path.join(rapid_io_files_location, 
                                               'input', 
                                               rapid_input_directory)
                forecast_directory = os.path.join(rapid_io_files_location, 
                                                  'output', 
                                                  rapid_input_directory, 
                                                  forecast_date_timestep)
                if os.path.exists(forecast_directory):
                    #loop through all the rapid_namelist files in directory
                    watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory)
                    if initialize_flows:
                        print "Initializing flows for", watershed, subbasin, "from", forecast_date_timestep
                        basin_files = find_current_rapid_output(forecast_directory, watershed, subbasin)
                        try:
                            compute_initial_rapid_flows(basin_files, input_directory, forecast_date_timestep)
                        except Exception, ex:
                            print ex
                            pass