def calculate_ws_and_wd_from_u_and_v(ds): nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) U = pfp_utils.GetVariable(ds, "U") V = pfp_utils.GetVariable(ds, "V") Ws = pfp_utils.CreateEmptyVariable("Ws", nrecs, datetime=U["DateTime"]) Wd = pfp_utils.CreateEmptyVariable("Wd", nrecs, datetime=U["DateTime"]) # get the wind speed and direction from the components Wd["Data"] = float(270) - (numpy.degrees(numpy.ma.arctan2(V["Data"], U["Data"]))) Wd["Data"] = numpy.ma.mod(Wd["Data"], 360) Ws["Data"] = numpy.ma.sqrt(U["Data"]*U["Data"] + V["Data"]*V["Data"]) # mask wind direction when the wind speed is less than 0.01 Wd["Data"] = numpy.ma.masked_where(Ws["Data"] < 0.01, Wd["Data"]) # now set the QC flag Ws["Flag"] = numpy.where(numpy.ma.getmaskarray(Ws["Data"]) == True, ones, zeros) Wd["Flag"] = numpy.where(numpy.ma.getmaskarray(Wd["Data"]) == True, ones, zeros) # update the variable attributes Ws["Attr"] = {"standard_name": "wind_speed", "long_name": "Wind speed", "units": "m/s", "statistic_type": "average"} Wd["Attr"] = {"standard_name": "wind_from_direction", "long_name": "Wind direction", "units": "degrees", "statistic_type": "average"} pfp_utils.CreateVariable(ds, Ws) pfp_utils.CreateVariable(ds, Wd) return
def remove_duplicates(ds): """ Remove duplicate timestamps, similar to Peter's solution in pfp_ts.py MergeDataStructures at L1 """ # get the datetime dtn = pfp_utils.GetVariable(ds, "DateTime") # remove duplicate timestamps dtn_unique, index_unique = numpy.unique(dtn["Data"], return_index=True) # restore the original order of the unique timestamps dtn_sorted = dtn_unique[numpy.argsort(index_unique)] # check to see if there were duplicates if len(dtn_sorted) < len(dtn["Data"]): n = len(dtn["Data"]) - len(dtn_sorted) msg = str(n) + " duplicate time stamps were removed for isd site " logger.warning(msg) nrecs = len(dtn_sorted) labels = list(ds.series.keys()) #if "DateTime" in labels: # labels.remove("DateTime") for label in labels: var1 = pfp_utils.CreateEmptyVariable(label, nrecs) varn = pfp_utils.GetVariable(ds, label) var1["Data"] = varn["Data"][index_unique] var1["Flag"] = varn["Flag"][index_unique] var1["Attr"] = varn["Attr"] pfp_utils.CreateVariable(ds, var1) return ds
def calculate_available_energy(ds): nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) Fh = pfp_utils.GetVariable(ds, "Fh") Fe = pfp_utils.GetVariable(ds, "Fe") Fa = pfp_utils.CreateEmptyVariable("Fa", nrecs, datetime=Fh["DateTime"]) Fa["Data"] = Fh["Data"] + Fe["Data"] Fa["Flag"] = numpy.where(numpy.ma.getmaskarray(Fa["Data"]) == True, ones, zeros) Fa["Attr"] = {"long_name": "Available energy", "units": "W/m^2", "statistic_type": "average"} pfp_utils.CreateVariable(ds, Fa) return
def calculate_net_radiation(ds): nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) Fnsw = pfp_utils.GetVariable(ds, "Fnsw") Fnlw = pfp_utils.GetVariable(ds, "Fnlw") Fn = pfp_utils.CreateEmptyVariable("Fn", nrecs, datetime=Fnsw["DateTime"]) Fn["Data"] = Fnsw["Data"] + Fnlw["Data"] Fn["Flag"] = numpy.where(numpy.ma.getmaskarray(Fn["Data"]) == True, ones, zeros) Fn["Attr"] = {"standard_name": "surface_net_downwawrd_radiative_flux", "long_name": "Net radiation", "units": "W/m^2", "statistic_type": "average"} pfp_utils.CreateVariable(ds, Fn) return
def calculate_upwelling_longwave_radiation(ds): nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) Fld = pfp_utils.GetVariable(ds, "Fld") Fnlw = pfp_utils.GetVariable(ds, "Fnlw") Flu = pfp_utils.CreateEmptyVariable("Flu", nrecs, datetime=Fld["DateTime"]) Flu["Data"] = Fld["Data"] - Fnlw["Data"] Flu["Flag"] = numpy.where(numpy.ma.getmaskarray(Fld["Data"]) == True, ones, zeros) Flu["Attr"] = {"standard_name": "surface_upwelling_longwave_flux_in_air", "long_name": "Up-welling shortwave radiation", "units": "W/m^2", "statistic_type": "average"} pfp_utils.CreateVariable(ds, Flu) return
def calculate_specific_humidity(ds): # from relative humidity nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) Ta = pfp_utils.GetVariable(ds,"Ta") ps = pfp_utils.GetVariable(ds,"ps") RH = pfp_utils.GetVariable(ds,"RH") SH = pfp_utils.CreateEmptyVariable("SH", nrecs, datetime=RH["DateTime"]) SH["Data"] = pfp_mf.specifichumidityfromrelativehumidity(RH["Data"], Ta["Data"], ps["Data"]) SH["Flag"] = numpy.where(numpy.ma.getmaskarray(SH["Data"]) == True, ones, zeros) SH["Attr"] = {"standard_name": "specific_humidity", "long_name": "Specific humidity", "units": "kg/kg", "statistic_type": "average"} pfp_utils.CreateVariable(ds, SH)
def calculate_absolute_humidity(ds): # from relative humidity nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) RH = pfp_utils.GetVariable(ds, "RH") Ta = pfp_utils.GetVariable(ds, "Ta") AH = pfp_utils.CreateEmptyVariable("AH", nrecs, datetime=RH["DateTime"]) AH["Data"] = pfp_mf.absolutehumidityfromrelativehumidity(Ta["Data"], RH["Data"]) AH["Flag"] = numpy.where(numpy.ma.getmaskarray(AH["Data"]) == True, ones, zeros) AH["Attr"] = {"standard_name": "mass_concentration_of_water_vapor_in_air", "long_name": "Absolute humidity", "units": "g/m^3", "statistic_type": "average"} pfp_utils.CreateVariable(ds, AH) return
def calculate_relative_humidity(ds): # from dew point temperature nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) Td = pfp_utils.GetVariable(ds, "Td") Ta = pfp_utils.GetVariable(ds, "Ta") RH = pfp_utils.CreateEmptyVariable("RH", nrecs, datetime=Td["DateTime"]) RH["Data"] = pfp_mf.relativehumidityfromdewpoint(Td["Data"], Ta["Data"]) RH["Flag"] = numpy.where(numpy.ma.getmaskarray(RH["Data"]) == True, ones, zeros) RH["Attr"] = {"standard_name": "relative_humidity", "long_name": "Relative humidity", "units": "percent", "statistic_type": "average"} pfp_utils.CreateVariable(ds, RH) return
def calculate_ground_heat_flux(ds): # as residual from net rad - avail energy nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) Fn = pfp_utils.GetVariable(ds, "Fn") Fa = pfp_utils.GetVariable(ds, "Fa") Fg = pfp_utils.CreateEmptyVariable("Fg", nrecs, datetime=Fn["DateTime"]) Fg["Data"] = Fn["Data"] - Fa["Data"] Fg["Flag"] = numpy.where(numpy.ma.getmaskarray(Fg["Data"]) == True, ones, zeros) Fg["Attr"] = {"standard_name": "downward_heat_flux_in_soil", "long_name": "Ground heat flux", "units": "W/m^2", "statistic_type": "average"} pfp_utils.CreateVariable(ds, Fg) return
def calculate_ground_heat_flux(ds): nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) for i in range(3): for j in range(3): Fn = pfp_utils.GetVariable(ds, "Fn" + "_" + str(i) + str(j)) Fa = pfp_utils.GetVariable(ds, "Fa" + "_" + str(i) + str(j)) Fg = pfp_utils.CreateEmptyVariable("Fg" + "_" + str(i) + str(j), nrecs, datetime=Fn["DateTime"]) Fg["Data"] = Fn["Data"] - Fa["Data"] Fg["Flag"] = numpy.where( numpy.ma.getmaskarray(Fg["Data"]) == True, ones, zeros) Fg["Attr"] = { "standard_name": "downward_heat_flux_in_soil", "long_name": "Ground heat flux", "units": "W/m^2", "statistic_type": "average" } pfp_utils.CreateVariable(ds, Fg) return
def calculate_upwelling_shortwave_radiation(ds): nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) for i in range(3): for j in range(3): Fsd = pfp_utils.GetVariable(ds, "Fsd" + "_" + str(i) + str(j)) Fnsw = pfp_utils.GetVariable(ds, "Fnsw" + "_" + str(i) + str(j)) Fsu = pfp_utils.CreateEmptyVariable("Fsu" + "_" + str(i) + str(j), nrecs, datetime=Fsd["DateTime"]) Fsu["Data"] = Fsd["Data"] - Fnsw["Data"] Fsu["Flag"] = numpy.where( numpy.ma.getmaskarray(Fsd["Data"]) == True, ones, zeros) Fsu["Attr"] = { "standard_name": "surface_upwelling_shortwave_flux_in_air", "long_name": "Up-welling shortwave radiation", "units": "W/m^2", "statistic_type": "average" } pfp_utils.CreateVariable(ds, Fsu) return
def read_isd_file_csv(isd_file_path): """ Purpose: Reads a NOAA ISD CSV file downlaoded from https://www.ncei.noaa.gov/data/global-hourly/access/ These files used to be field formatted ASCII where the character position in a line of ASCII determined the data type. Some time in 2020 or 2021, the old FFA format was replaced with CSV. The format of the old-style .gz files is described in https://www.ncei.noaa.gov/data/global-hourly/doc/isd-format-document.pdf This document still describes the data in the new CSV format. Usage: Side effects: Returns a PFP data structure with the data at the site time step. Author: PRI Date: July 2021 """ msg = " Reading " + isd_file_path logger.info(msg) # list of variables to read from the CSV file csv_labels = [ "STATION", "DATE", "LATITUDE", "LONGITUDE", "ELEVATION", "REPORT_TYPE", "QUALITY_CONTROL", "WND", "TMP", "DEW", "SLP", "AA1", "AA2", "AA3", "AA4" ] # read the CSV file df = pandas.read_csv(isd_file_path, delimiter=",", header=0) # remove items from csv_labels that are not in the data frame df_labels = df.columns.to_list() for csv_label in list(csv_labels): if csv_label not in df_labels: csv_labels.remove(csv_label) # keep only what we need df = df[csv_labels] # remove duplicate dates, keep the SYNOP (FM-12) reports # first, we find the duplicate dates df["Duplicates"] = df["DATE"].duplicated() # next, we drop rows with duplicate dates that are not SYNOP reports df = df.drop(df[(df["Duplicates"]) & (df["REPORT_TYPE"] != "FM-12")].index) # then check for duplicates again df["Duplicates"] = df["DATE"].duplicated() if df["Duplicates"].sum() != 0: msg = " Unable to remove all duplicate dates in files" logger.error(msg) raise ValueError(msg) # convert the date in the CSV file to a pandas datetime df["TIMESTAMP"] = pandas.to_datetime(df["DATE"].astype("string"), errors="raise") # find all of the timestamps (should only be 1) timestamps = list(df.select_dtypes(include=['datetime64'])) # take the first if more than 1 timestamp = timestamps[0] # use the timestamp as the index df.set_index(timestamp, inplace=True) df.index = df.index.round('1S') # wind direction field, see isd_format_document.pdf for details wind = df["WND"].str.split(',', expand=True) df["Wd"] = wind[0].apply(pandas.to_numeric, errors='coerce') df["Ws"] = wind[3].apply(pandas.to_numeric, errors='coerce') / float(10) del df["WND"] # air temperature temperature = df["TMP"].str.split(',', expand=True) df["Ta"] = temperature[0].apply(pandas.to_numeric, errors='coerce') / float(10) del df["TMP"] # dew point temperature dew_point = df["DEW"].str.split(',', expand=True) df["Td"] = dew_point[0].apply(pandas.to_numeric, errors='coerce') / float(10) del df["DEW"] # surface pressure surface_pressure = df["SLP"].str.split(',', expand=True) df["ps"] = surface_pressure[0].apply(pandas.to_numeric, errors='coerce') / float(100) del df["SLP"] # Precipitation is stored in columns AA1 to AA4 but not all columns will be present # # Within each column, precipitation is stored as "HH,PPPP,C,Q" where HH is the # period over which the precipitation was accumulated (e.g. 1, 3, 6, 24 hours), # PPPP is the precipitation amount in mm*10, C is the condition code and Q is # the QC flag (1 = passed all QC checks) # # Column AA1 contains most of the precipitation data. When precipitation data is # available for 2 accumulation periods e.g. 3 hours and 6 or 24 hours, the second # accumulation period is given in AA2. And so on for up to 4 separate accumulation' # periods e.g. 1 hour, 3 hours, 6 hours and 24 hours. # # get a list of the precipitation columns in the data frame precip_labels = [l for l in df.columns.to_list() if "AA" in l] # create a data frame for the precipitation data, same index as main data frame df_precip = pandas.DataFrame(index=df.index) # loop over the precipitation fields for precip_label in precip_labels: # split the "HH,PPPP,C,Q" fields to get individual parts tmp = df[precip_label].str.split(',', expand=True) # name the columns tmp.columns = ["Period", "Amount", "Condition", "Quality"] # coerce to numeric values tmp = tmp.apply(pandas.to_numeric, errors='coerce') # loop over the accumulation periods for n in [1, 3, 6, 24]: # get the data for this accumulation period and store in a new column # e.g. "3_hourly_AA1" tmp.loc[(tmp["Period"] == n) & (tmp["Quality"] == 1), str(n) + "_hourly_" + precip_label] = tmp["Amount"] # drop the intermediate columns, no longer needed tmp = tmp.drop(["Period", "Amount", "Condition", "Quality"], axis=1) # concatenate the new data df_precip = pandas.concat([df_precip, tmp], axis=1) # drop the individual columns e.g. AA1, AA2 etc df.drop(precip_label, axis=1, inplace=True) # now loop over the accumulation periods and combine to get a single column # for each accumulation period for n in [1, 3, 6, 24]: # list of column headings for this accumulation period label = str(n) + "_hourly" hour_labels = [l for l in df_precip.columns.to_list() if label in l] # rename the first column e.g. "3_hourly_AA1" to "3_hourly" df_precip.rename({hour_labels[0]: label}, axis=1, inplace=True) # loop over the remaining columns and merge into a single column for this # accumulation period for hour_label in hour_labels[1:]: # merge "3_hourly" with "3_hourly_AA2" etc df_precip[label] = df_precip[label].combine_first( df_precip[hour_label]) # convert mm*10 to mm df_precip[label] = df_precip[label] / float(10) # delete columns that are no longer needed df_precip.drop(hour_label, axis=1, inplace=True) # print the sum of the 1, 3, 6 and 24 hourly accumulation periods (we expect them to # be equal) msg = " 1 hourly precipitation total is " + str( round(df_precip["1_hourly"].sum(), 4)) logger.info(msg) msg = " 3 hourly precipitation total is " + str( round(df_precip["3_hourly"].sum(), 4)) logger.info(msg) msg = " 6 hourly precipitation total is " + str( round(df_precip["6_hourly"].sum(), 4)) logger.info(msg) msg = " 24 hourly precipitation total is " + str( round(df_precip["24_hourly"].sum(), 4)) logger.info(msg) # choose the most common accumulation period msg = " Using " + df_precip.count().idxmax() + " for precipitation" logger.info(msg) # and use it for the precipitation data df["Precip"] = df_precip[df_precip.count().idxmax()] # now copy the data from a pandas data frame to a PFP data structure nrecs = len(df) ones = numpy.ones(nrecs) zeros = numpy.zeros(nrecs) # create a data structure ds_its = pfp_io.DataStructure() # set the global attributes ds_its.globalattributes["nc_nrecs"] = nrecs ds_its.globalattributes["altitude"] = float(df["ELEVATION"][0]) ds_its.globalattributes["latitude"] = float(df["LATITUDE"][0]) ds_its.globalattributes["longitude"] = float(df["LONGITUDE"][0]) ds_its.globalattributes["isd_site_id"] = int(df["STATION"][0]) # get the datetime variable ldt = pfp_utils.CreateEmptyVariable("DateTime", nrecs) ldt["Data"] = numpy.array(df.index.to_pydatetime()) ldt["Flag"] = zeros ldt["Attr"] = {"long_name": "Datetime in UTC", "units": ""} pfp_utils.CreateVariable(ds_its, ldt) # get the time step dt = pfp_utils.get_timestep(ds_its) time_step = int(scipy.stats.mode(dt / float(60))[0][0]) if time_step not in [10, 30, 60, 180]: msg = " Time step (" + str( time_step) + ") must be 10, 30, 60 or 180 minutes" logger.error(msg) raise ValueError(msg) else: ds_its.globalattributes["time_step"] = int( scipy.stats.mode(dt / float(60))[0][0]) # now add the other variables # wind direction Wd = pfp_utils.CreateEmptyVariable("Wd", nrecs, datetime=ldt["Data"]) Wd["Data"] = numpy.ma.masked_equal(df["Wd"].values, 999) Wd["Flag"] = numpy.where( numpy.ma.getmaskarray(Wd["Data"]) == True, ones, zeros) Wd["Attr"] = { "long_name": "Wind direction", "statistic_type": "average", "standard_name": "wind_from_direction", "units": "degrees" } pfp_utils.CreateVariable(ds_its, Wd) # wind speed Ws = pfp_utils.CreateEmptyVariable("Ws", nrecs, datetime=ldt["Data"]) Ws["Data"] = numpy.ma.masked_equal(df["Ws"].values, 999.9) Ws["Flag"] = numpy.where( numpy.ma.getmaskarray(Ws["Data"]) == True, ones, zeros) Ws["Attr"] = { "long_name": "Wind speed", "statistic_type": "average", "standard_name": "wind_speed", "units": "m/s" } pfp_utils.CreateVariable(ds_its, Ws) # air temperature Ta = pfp_utils.CreateEmptyVariable("Ta", nrecs, datetime=ldt["Data"]) Ta["Data"] = numpy.ma.masked_equal(df["Ta"].values, 999.9) Ta["Flag"] = numpy.where( numpy.ma.getmaskarray(Ta["Data"]) == True, ones, zeros) Ta["Attr"] = { "long_name": "Air temperature", "statistic_type": "average", "standard_name": "air_temperature", "units": "degC" } pfp_utils.CreateVariable(ds_its, Ta) # dew point temperature Td = pfp_utils.CreateEmptyVariable("Td", nrecs, datetime=ldt["Data"]) Td["Data"] = numpy.ma.masked_equal(df["Td"].values, 999.9) Td["Flag"] = numpy.where( numpy.ma.getmaskarray(Td["Data"]) == True, ones, zeros) Td["Attr"] = { "long_name": "Dew point temperature", "statistic_type": "average", "standard_name": "dew_point_temperature", "units": "degC" } pfp_utils.CreateVariable(ds_its, Td) # surface pressure ps = pfp_utils.CreateEmptyVariable("ps", nrecs, datetime=ldt["Data"]) site_altitude = float(ds_its.globalattributes["altitude"]) cfac = numpy.ma.exp( (-1 * site_altitude) / ((Ta["Data"] + 273.15) * 29.263)) ps["Data"] = numpy.ma.masked_equal(df["ps"].values, 9999.9) ps["Data"] = ps["Data"] * cfac ps["Flag"] = numpy.where( numpy.ma.getmaskarray(ps["Data"]) == True, ones, zeros) ps["Attr"] = { "long_name": "Surface pressure", "statistic_type": "average", "standard_name": "surface_air_pressure", "units": "kPa" } pfp_utils.CreateVariable(ds_its, ps) # precipitation Precip = pfp_utils.CreateEmptyVariable("Precip", nrecs, datetime=ldt["Data"]) Precip["Data"] = numpy.ma.masked_equal(df["Precip"].values, 999.9) Precip["Flag"] = numpy.where( numpy.ma.getmaskarray(Precip["Data"]) == True, ones, zeros) Precip["Attr"] = { "long_name": "Rainfall", "statistic_type": "sum", "standard_name": "thickness_of_rainfall_amount", "units": "mm" } pfp_utils.CreateVariable(ds_its, Precip) # relative humidity RH = pfp_utils.CreateEmptyVariable("RH", nrecs, datetime=ldt["Data"]) RH["Data"] = mf.relativehumidityfromdewpoint(Td["Data"], Ta["Data"]) RH["Flag"] = numpy.where( numpy.ma.getmaskarray(RH["Data"]) == True, ones, zeros) RH["Attr"] = { "long_name": "Relative humidity", "statistics_type": "average", "standard_name": "relative_humidity", "units": "percent" } pfp_utils.CreateVariable(ds_its, RH) # absolute humidity AH = pfp_utils.CreateEmptyVariable("AH", nrecs, datetime=ldt["Data"]) AH["Data"] = mf.absolutehumidityfromrelativehumidity( Ta["Data"], RH["Data"]) AH["Flag"] = numpy.where( numpy.ma.getmaskarray(AH["Data"]) == True, ones, zeros) AH["Attr"] = { "long_name": "Absolute humidity", "statistic_type": "average", "standard_name": "mass_concentration_of_water_vapor_in_air", "units": "g/m^3" } pfp_utils.CreateVariable(ds_its, AH) # specific humidity SH = pfp_utils.CreateEmptyVariable("SH", nrecs, datetime=ldt["Data"]) SH["Data"] = mf.specifichumidityfromrelativehumidity( RH["Data"], Ta["Data"], ps["Data"]) SH["Flag"] = numpy.where( numpy.ma.getmaskarray(SH["Data"]) == True, ones, zeros) SH["Attr"] = { "long_name": "Specific humidity", "statistic_type": "average", "standard_name": "specific_humidity", "units": "kg/kg" } pfp_utils.CreateVariable(ds_its, SH) return ds_its
ldt_one = ds_out[i].series["DateTime"]["Data"] idx = numpy.searchsorted(ldt_all, numpy.intersect1d(ldt_all, ldt_one)) idy = numpy.searchsorted(ldt_one, numpy.intersect1d(ldt_one, ldt_all)) # then we get a list of the variables to copy series_list = list(ds_out[i].series.keys()) # and remove the datetime if "DateTime" in series_list: series_list.remove("DateTime") # and then we loop over the variables to be copied for label in series_list: # append a number, unique to each ISD station, to the variable label all_label = label + "_" + str(i) # create empty data and flag arrays variable = pfp_utils.CreateEmptyVariable(all_label, nrecs) pfp_utils.CreateSeries(ds_all, all_label, variable["Data"], Flag=variable["Flag"], Attr=variable["Attr"]) # read the data out of the ISD site data structure data, flag, attr = pfp_utils.GetSeriesasMA(ds_out[i], label) # add the ISD site ID attr["isd_site_id"] = isd_site_id # put the data, flag and attributes into the all-in-one data structure ds_all.series[all_label]["Data"][idx] = data[idy] ds_all.series[all_label]["Flag"][idx] = flag[idy] ds_all.series[all_label]["Attr"] = copy.deepcopy(attr) # do the QC checks cfg_qc = copy.deepcopy(cfg)