def calculate_ws_and_wd_from_u_and_v(ds): nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) U = pfp_utils.GetVariable(ds, "U") V = pfp_utils.GetVariable(ds, "V") Ws = pfp_utils.CreateEmptyVariable("Ws", nrecs, datetime=U["DateTime"]) Wd = pfp_utils.CreateEmptyVariable("Wd", nrecs, datetime=U["DateTime"]) # get the wind speed and direction from the components Wd["Data"] = float(270) - (numpy.degrees(numpy.ma.arctan2(V["Data"], U["Data"]))) Wd["Data"] = numpy.ma.mod(Wd["Data"], 360) Ws["Data"] = numpy.ma.sqrt(U["Data"]*U["Data"] + V["Data"]*V["Data"]) # mask wind direction when the wind speed is less than 0.01 Wd["Data"] = numpy.ma.masked_where(Ws["Data"] < 0.01, Wd["Data"]) # now set the QC flag Ws["Flag"] = numpy.where(numpy.ma.getmaskarray(Ws["Data"]) == True, ones, zeros) Wd["Flag"] = numpy.where(numpy.ma.getmaskarray(Wd["Data"]) == True, ones, zeros) # update the variable attributes Ws["Attr"] = {"standard_name": "wind_speed", "long_name": "Wind speed", "units": "m/s", "statistic_type": "average"} Wd["Attr"] = {"standard_name": "wind_from_direction", "long_name": "Wind direction", "units": "degrees", "statistic_type": "average"} pfp_utils.CreateVariable(ds, Ws) pfp_utils.CreateVariable(ds, Wd) return
def gH2Opm3_to_mmolpmol(ds, MF_out, AH_in, Ta_in, ps_in): """ Purpose: Calculate H2O mole fraction in mml/mol from absolute humidity in g/m^3. Usage: pfp_func_units.gH2Opm3_to_mmolpmol(ds, MF_out, AH_in, Ta_in, ps_in) Author: PRI Date: August 2019 """ nRecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nRecs, dtype=numpy.int32) ones = numpy.ones(nRecs, dtype=numpy.int32) for item in [AH_in, Ta_in, ps_in]: if item not in ds.root["Variables"].keys(): msg = " Requested series " + item + " not found, " + MF_out + " not calculated" logger.error(msg) return 0 AH = pfp_utils.GetVariable(ds, AH_in) AH = pfp_utils.convert_units_func(ds, AH, "g/m^3") Ta = pfp_utils.GetVariable(ds, Ta_in) Ta = pfp_utils.convert_units_func(ds, Ta, "degC") ps = pfp_utils.GetVariable(ds, ps_in) ps = pfp_utils.convert_units_func(ds, ps, "kPa") MF = pfp_utils.GetVariable(ds, MF_out) MF["Data"] = pfp_mf.h2o_mmolpmolfromgpm3(AH["Data"], Ta["Data"], ps["Data"]) MF["Flag"] = numpy.where( numpy.ma.getmaskarray(MF["Data"]) == True, ones, zeros) MF["Attr"]["units"] = "mmol/mol" pfp_utils.CreateVariable(ds, MF) return 1
def Standard_deviation_from_variance(ds, Sd_out, Vr_in): """ Purpose: Function to convert variance to standard deviation. Usage: pfp_func_statistics.Standard_deviation_from_variance(ds, Sd_out, Vr_in) Author: PRI Date: October 2020 """ vr_units = { "mg^2/m^6": "mg/m3", "mmol^2/m^6": "mmol/m^3", "g^2/m^6": "g/m^3", "degC^2": "degC", "K^2": "K", "m^2/s^2": "m/s" } vr = pfp_utils.GetVariable(ds, Vr_in) if vr["Attr"]["units"] not in list(vr_units.keys()): msg = " Unrecognised units (" + vr["Attr"][ "units"] + ") for variable " + Vr_in logger.error(msg) msg = " Standard deviation not calculated from variance" logger.error(msg) return 0 sd = copy.deepcopy(vr) sd["Label"] = Sd_out sd["Data"] = numpy.ma.sqrt(vr["Data"]) sd["Attr"]["units"] = vr_units[vr["Attr"]["units"]] if "statistic_type" in sd["Attr"]: sd["Attr"]["statistic_type"] = "standard_deviation" pfp_utils.CreateVariable(ds, sd) return 1
def Variance_from_standard_deviation(ds, Vr_out, Sd_in): """ Purpose: Function to convert standard deviation to variance. Usage: pfp_func_statistics.Variance_from_standard_deviation(ds, Vr_out, Sd_in) Author: PRI Date: October 2020 """ sd_units = { "mg/m3": "mg^2/m^6", "mmol/m^3": "mmol^2/m^6", "g/m^3": "g^2/m^6", "degC": "degC^2", "K": "K^2", "m/s": "m^2/s^2" } sd = pfp_utils.GetVariable(ds, Sd_in) if sd["Attr"]["units"] not in list(sd_units.keys()): msg = " Unrecognised units (" + sd["Attr"][ "units"] + ") for variable " + Sd_in logger.error(msg) msg = " Variance not calculated from standard deviation" logger.error(msg) return 0 vr = copy.deepcopy(sd) vr["Label"] = Vr_out vr["Data"] = sd["Data"] * sd["Data"] vr["Attr"]["units"] = sd_units[sd["Attr"]["units"]] if "statistic_type" in vr["Attr"]: vr["Attr"]["statistic_type"] = "variance" pfp_utils.CreateVariable(ds, vr) return 1
def gH2Opm3_to_mmolpm3(ds, H2O_out, AH_in): """ Purpose: Calculate H2O molar density in mmol/m^3 from absolute humidity in g/m^3. Usage: pfp_func_units.gH2Opm3_to_mmolpm3(ds, MD_out, AH_in) Author: PRI Date: September 2020 """ for item in [AH_in]: if item not in ds.root["Variables"].keys(): msg = " Requested series " + item + " not found, " + H2O_out + " not calculated" logger.error(msg) return 0 var_in = pfp_utils.GetVariable(ds, AH_in) got_variance = False if var_in["Label"][-3:] == "_Vr" and var_in["Attr"]["units"] in [ "g^2/m^6", "gH2O^2/m^6" ]: got_variance = True var_in["Data"] = numpy.ma.sqrt(var_in["Data"]) var_in["Attr"]["units"] = "g/m^3" var_out = pfp_utils.convert_units_func(ds, var_in, "mmol/m^3", mode="quiet") var_out["Label"] = H2O_out if got_variance: var_out["Data"] = var_out["Data"] * var_out["Data"] var_out["Attr"]["units"] = "mmol^2/m^6" pfp_utils.CreateVariable(ds, var_out) return 1
def gfMDS_mask_long_gaps(ds, mds_label, l5_info, called_by): """ Purpose: Mask gaps that are longer than a specified maximum length. Usage: Side effects: Author: PRI Date: June 2019 """ if "MaxShortGapRecords" not in l5_info[called_by]["info"]: return max_short_gap_days = l5_info[called_by]["info"]["MaxShortGapDays"] msg = " Masking gaps longer than " + str(max_short_gap_days) + " days" logger.info(msg) label = l5_info[called_by]["outputs"][mds_label]["target"] target = pfp_utils.GetVariable(ds, label) variable = pfp_utils.GetVariable(ds, mds_label) mask = numpy.ma.getmaskarray(target["Data"]) # start and stop indices of contiguous blocks max_short_gap_records = l5_info[called_by]["info"]["MaxShortGapRecords"] gap_start_end = pfp_utils.contiguous_regions(mask) for start, stop in gap_start_end: gap_length = stop - start if gap_length > max_short_gap_records: variable["Data"][start:stop] = target["Data"][start:stop] variable["Flag"][start:stop] = target["Flag"][start:stop] # put data_int back into the data structure pfp_utils.CreateVariable(ds, variable) return
def percent_to_mmolpmol(ds, MF_out, RH_in, Ta_in, ps_in): """ Purpose: Calculate H2O mole fraction from relative humidity (RH). """ nRecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nRecs, dtype=numpy.int32) ones = numpy.ones(nRecs, dtype=numpy.int32) for item in [RH_in, Ta_in, ps_in]: if item not in list(ds.root["Variables"].keys()): msg = " Requested series " + item + " not found, " + MF_out + " not calculated" logger.error(msg) return 0 # get the relative humidity and check the units RH = pfp_utils.GetVariable(ds, RH_in) RH = pfp_utils.convert_units_func(ds, RH, "percent") # get the temperature and check the units Ta = pfp_utils.GetVariable(ds, Ta_in) Ta = pfp_utils.convert_units_func(ds, Ta, "degC") # get the absoulte humidity AH_data = pfp_mf.absolutehumidityfromrelativehumidity( Ta["Data"], RH["Data"]) # get the atmospheric pressure and check the units ps = pfp_utils.GetVariable(ds, ps_in) ps = pfp_utils.convert_units_func(ds, ps, "kPa") # get the output variable (created in pfp_ts.DoFunctions()) MF = pfp_utils.GetVariable(ds, MF_out) # do the business MF["Data"] = pfp_mf.h2o_mmolpmolfromgpm3(AH_data, Ta["Data"], ps["Data"]) MF["Flag"] = numpy.where( numpy.ma.getmaskarray(MF["Data"]) == True, ones, zeros) MF["Attr"]["units"] = "mmol/mol" # put the output variable back into the data structure pfp_utils.CreateVariable(ds, MF) return 1
def mmolpm3_to_gH2Opm3(ds, AH_out, H2O_in): """ Purpose: Function to convert mmol/m^3 (molar density) to g/m^3 (mass density). Usage: pfp_func_units.mmolpm3_to_gpm3(ds, AH_out, H2O_in) Author: PRI Date: August 2020 """ for item in [H2O_in]: if item not in list(ds.root["Variables"].keys()): msg = " Requested series " + item + " not found, " + AH_out + " not calculated" logger.error(msg) return 0 var_in = pfp_utils.GetVariable(ds, H2O_in) got_variance = False if var_in["Label"][-3:] == "_Vr" and var_in["Attr"][ "units"] == "mmol^2/m^6": got_variance = True var_in["Data"] = numpy.ma.sqrt(var_in["Data"]) var_in["Attr"]["units"] = "mmol/m^3" var_out = pfp_utils.convert_units_func(ds, var_in, "g/m^3", mode="quiet") var_out["Label"] = AH_out if got_variance: var_out["Data"] = var_out["Data"] * var_out["Data"] var_out["Attr"]["units"] = "g^2/m^6" pfp_utils.CreateVariable(ds, var_out) return 1
def gH2Opm3_to_percent(ds, RH_out, AH_in, Ta_in): """ Purpose: Function to convert absolute humidity in units of g/m^3 to relative humidity in percent. Usage: pfp_func_units.gH2Opm3_to_percent(ds, RH_out, AH_in, Ta_in) Author: PRI Date: September 2020 """ nRecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nRecs, dtype=numpy.int32) ones = numpy.ones(nRecs, dtype=numpy.int32) for item in [AH_in, Ta_in]: if item not in ds.root["Variables"].keys(): msg = " Requested series " + item + " not found, " + RH_out + " not calculated" logger.error(msg) return 0 AH = pfp_utils.GetVariable(ds, AH_in) Ta = pfp_utils.GetVariable(ds, Ta_in) RH = pfp_utils.GetVariable(ds, RH_out) RH["Data"] = pfp_mf.relativehumidityfromabsolutehumidity( AH["Data"], Ta["Data"]) RH["Flag"] = numpy.where( numpy.ma.getmaskarray(RH["Data"]) == True, ones, zeros) RH["Attr"]["units"] = "percent" pfp_utils.CreateVariable(ds, RH) return 1
def mgCO2pm3_to_umolpmol(ds, MF_out, CO2_in, Ta_in, ps_in): """ Purpose: Calculate CO2 mole fraction in uml/mol from mass density in mgCO2/m3. Usage: pfp_func_units.mgCO2pm3_to_umolpmol(ds, MF_out, CO2_in, Ta_in, ps_in) Author: PRI Date: August 2019 """ nRecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nRecs, dtype=numpy.int32) ones = numpy.ones(nRecs, dtype=numpy.int32) for item in [CO2_in, Ta_in, ps_in]: if item not in ds.root["Variables"].keys(): msg = " Requested series " + item + " not found, " + MF_out + " not calculated" logger.error(msg) return 0 CO2 = pfp_utils.GetVariable(ds, CO2_in) CO2 = pfp_utils.convert_units_func(ds, CO2, "mg/m^3") Ta = pfp_utils.GetVariable(ds, Ta_in) Ta = pfp_utils.convert_units_func(ds, Ta, "degC") ps = pfp_utils.GetVariable(ds, ps_in) ps = pfp_utils.convert_units_func(ds, ps, "kPa") MF = pfp_utils.GetVariable(ds, MF_out) MF["Data"] = pfp_mf.co2_ppmfrommgCO2pm3(CO2["Data"], Ta["Data"], ps["Data"]) MF["Flag"] = numpy.where( numpy.ma.getmaskarray(MF["Data"]) == True, ones, zeros) MF["Attr"]["units"] = "umol/mol" pfp_utils.CreateVariable(ds, MF) return 1
def mgCO2pm3_to_mmolpm3(ds, CO2_out, CO2_in): """ Purpose: Calculate CO2 molar density in mmol/m3 from CO2 concentration in mg/m3. Usage: pfp_func_units.mgCO2pm3_to_mmolpm3(ds, CO2_out, CO2_in) Author: PRI Date: September 2020 """ for item in [CO2_in]: if item not in ds.root["Variables"].keys(): msg = " Requested series " + item + " not found, " + CO2_out + " not calculated" logger.error(msg) return 0 var_in = pfp_utils.GetVariable(ds, CO2_in) got_variance = False if var_in["Label"][-3:] == "_Vr" and var_in["Attr"]["units"] in [ "mg^2/m^6", "mgCO2^2/m^6" ]: got_variance = True var_in["Data"] = numpy.ma.sqrt(var_in["Data"]) var_in["Attr"]["units"] = "mg/m^3" var_out = pfp_utils.convert_units_func(ds, var_in, "mmol/m^3", mode="quiet") var_out["Label"] = CO2_out if got_variance: var_out["Data"] = var_out["Data"] * var_out["Data"] var_out["Attr"]["units"] = "mmol^2/m^6" pfp_utils.CreateVariable(ds, var_out) return 1
def remove_duplicates(ds): """ Remove duplicate timestamps, similar to Peter's solution in pfp_ts.py MergeDataStructures at L1 """ # get the datetime dtn = pfp_utils.GetVariable(ds, "DateTime") # remove duplicate timestamps dtn_unique, index_unique = numpy.unique(dtn["Data"], return_index=True) # restore the original order of the unique timestamps dtn_sorted = dtn_unique[numpy.argsort(index_unique)] # check to see if there were duplicates if len(dtn_sorted) < len(dtn["Data"]): n = len(dtn["Data"]) - len(dtn_sorted) msg = str(n) + " duplicate time stamps were removed for isd site " logger.warning(msg) nrecs = len(dtn_sorted) labels = list(ds.series.keys()) #if "DateTime" in labels: # labels.remove("DateTime") for label in labels: var1 = pfp_utils.CreateEmptyVariable(label, nrecs) varn = pfp_utils.GetVariable(ds, label) var1["Data"] = varn["Data"][index_unique] var1["Flag"] = varn["Flag"][index_unique] var1["Attr"] = varn["Attr"] pfp_utils.CreateVariable(ds, var1) return ds
def percent_to_m3pm3(ds, Sws_out, Sws_in): """ Purpose: Function to convert Sws in units of "percent" (1 to 100) to "frac" (0 to 1). Usage: pfp_func_units.percent_to_m3pm3(ds, Sws_out, Sws_in) Author: PRI Date: April 2020 """ var_in = pfp_utils.GetVariable(ds, Sws_in) var_out = pfp_utils.convert_units_func(ds, var_in, "m^3/m^3", mode="quiet") var_out["Label"] = Sws_out pfp_utils.CreateVariable(ds, var_out) return 1
def Pa_to_kPa(ds, ps_out, ps_in): """ Purpose: Function to convert pressure from Pa to kPa. Usage: pfp_func_units.Pa_to_kPa(ds, ps_out, ps_in) Author: PRI Date: February 2018 """ var_in = pfp_utils.GetVariable(ds, ps_in) var_out = pfp_utils.convert_units_func(ds, var_in, "kPa", mode="quiet") var_out["Label"] = ps_out pfp_utils.CreateVariable(ds, var_out) return 1
def kgpm3_to_gpm3(ds, AH_out, AH_in): """ Purpose: Function to convert absolute humidity from kg/m^3 to g/m^3. Usage: pfp_func_units.kgpm3_to_gpm3(ds, AH_out, AH_in) Author: PRI Date: August 2020 """ var_in = pfp_utils.GetVariable(ds, AH_in) var_out = pfp_utils.convert_units_func(ds, var_in, "g/m^3", mode="quiet") var_out["Label"] = AH_out pfp_utils.CreateVariable(ds, var_out) return 1
def calculate_available_energy(ds): nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) Fh = pfp_utils.GetVariable(ds, "Fh") Fe = pfp_utils.GetVariable(ds, "Fe") Fa = pfp_utils.CreateEmptyVariable("Fa", nrecs, datetime=Fh["DateTime"]) Fa["Data"] = Fh["Data"] + Fe["Data"] Fa["Flag"] = numpy.where(numpy.ma.getmaskarray(Fa["Data"]) == True, ones, zeros) Fa["Attr"] = {"long_name": "Available energy", "units": "W/m^2", "statistic_type": "average"} pfp_utils.CreateVariable(ds, Fa) return
def fraction_to_percent(ds, RH_out, RH_in): """ Purpose: Function to convert RH in units of "frac" (0 to 1) to "percent" (1 to 100). Usage: pfp_func_units.fraction_to_percent(ds, RH_out, RH_in) Author: PRI Date: August 2019 """ var_in = pfp_utils.GetVariable(ds, RH_in) var_out = pfp_utils.convert_units_func(ds, var_in, "percent", mode="quiet") var_out["Label"] = RH_out pfp_utils.CreateVariable(ds, var_out) return 1
def calculate_upwelling_longwave_radiation(ds): nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) Fld = pfp_utils.GetVariable(ds, "Fld") Fnlw = pfp_utils.GetVariable(ds, "Fnlw") Flu = pfp_utils.CreateEmptyVariable("Flu", nrecs, datetime=Fld["DateTime"]) Flu["Data"] = Fld["Data"] - Fnlw["Data"] Flu["Flag"] = numpy.where(numpy.ma.getmaskarray(Fld["Data"]) == True, ones, zeros) Flu["Attr"] = {"standard_name": "surface_upwelling_longwave_flux_in_air", "long_name": "Up-welling shortwave radiation", "units": "W/m^2", "statistic_type": "average"} pfp_utils.CreateVariable(ds, Flu) return
def calculate_net_radiation(ds): nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) Fnsw = pfp_utils.GetVariable(ds, "Fnsw") Fnlw = pfp_utils.GetVariable(ds, "Fnlw") Fn = pfp_utils.CreateEmptyVariable("Fn", nrecs, datetime=Fnsw["DateTime"]) Fn["Data"] = Fnsw["Data"] + Fnlw["Data"] Fn["Flag"] = numpy.where(numpy.ma.getmaskarray(Fn["Data"]) == True, ones, zeros) Fn["Attr"] = {"standard_name": "surface_net_downwawrd_radiative_flux", "long_name": "Net radiation", "units": "W/m^2", "statistic_type": "average"} pfp_utils.CreateVariable(ds, Fn) return
def calculate_specific_humidity(ds): # from relative humidity nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) Ta = pfp_utils.GetVariable(ds,"Ta") ps = pfp_utils.GetVariable(ds,"ps") RH = pfp_utils.GetVariable(ds,"RH") SH = pfp_utils.CreateEmptyVariable("SH", nrecs, datetime=RH["DateTime"]) SH["Data"] = pfp_mf.specifichumidityfromrelativehumidity(RH["Data"], Ta["Data"], ps["Data"]) SH["Flag"] = numpy.where(numpy.ma.getmaskarray(SH["Data"]) == True, ones, zeros) SH["Attr"] = {"standard_name": "specific_humidity", "long_name": "Specific humidity", "units": "kg/kg", "statistic_type": "average"} pfp_utils.CreateVariable(ds, SH)
def calculate_absolute_humidity(ds): # from relative humidity nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) RH = pfp_utils.GetVariable(ds, "RH") Ta = pfp_utils.GetVariable(ds, "Ta") AH = pfp_utils.CreateEmptyVariable("AH", nrecs, datetime=RH["DateTime"]) AH["Data"] = pfp_mf.absolutehumidityfromrelativehumidity(Ta["Data"], RH["Data"]) AH["Flag"] = numpy.where(numpy.ma.getmaskarray(AH["Data"]) == True, ones, zeros) AH["Attr"] = {"standard_name": "mass_concentration_of_water_vapor_in_air", "long_name": "Absolute humidity", "units": "g/m^3", "statistic_type": "average"} pfp_utils.CreateVariable(ds, AH) return
def calculate_relative_humidity(ds): # from dew point temperature nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) Td = pfp_utils.GetVariable(ds, "Td") Ta = pfp_utils.GetVariable(ds, "Ta") RH = pfp_utils.CreateEmptyVariable("RH", nrecs, datetime=Td["DateTime"]) RH["Data"] = pfp_mf.relativehumidityfromdewpoint(Td["Data"], Ta["Data"]) RH["Flag"] = numpy.where(numpy.ma.getmaskarray(RH["Data"]) == True, ones, zeros) RH["Attr"] = {"standard_name": "relative_humidity", "long_name": "Relative humidity", "units": "percent", "statistic_type": "average"} pfp_utils.CreateVariable(ds, RH) return
def calculate_ground_heat_flux(ds): # as residual from net rad - avail energy nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) Fn = pfp_utils.GetVariable(ds, "Fn") Fa = pfp_utils.GetVariable(ds, "Fa") Fg = pfp_utils.CreateEmptyVariable("Fg", nrecs, datetime=Fn["DateTime"]) Fg["Data"] = Fn["Data"] - Fa["Data"] Fg["Flag"] = numpy.where(numpy.ma.getmaskarray(Fg["Data"]) == True, ones, zeros) Fg["Attr"] = {"standard_name": "downward_heat_flux_in_soil", "long_name": "Ground heat flux", "units": "W/m^2", "statistic_type": "average"} pfp_utils.CreateVariable(ds, Fg) return
def calculate_ground_heat_flux(ds): nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) for i in range(3): for j in range(3): Fn = pfp_utils.GetVariable(ds, "Fn" + "_" + str(i) + str(j)) Fa = pfp_utils.GetVariable(ds, "Fa" + "_" + str(i) + str(j)) Fg = pfp_utils.CreateEmptyVariable("Fg" + "_" + str(i) + str(j), nrecs, datetime=Fn["DateTime"]) Fg["Data"] = Fn["Data"] - Fa["Data"] Fg["Flag"] = numpy.where( numpy.ma.getmaskarray(Fg["Data"]) == True, ones, zeros) Fg["Attr"] = { "standard_name": "downward_heat_flux_in_soil", "long_name": "Ground heat flux", "units": "W/m^2", "statistic_type": "average" } pfp_utils.CreateVariable(ds, Fg) return
def calculate_upwelling_shortwave_radiation(ds): nrecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nrecs) ones = numpy.ones(nrecs) for i in range(3): for j in range(3): Fsd = pfp_utils.GetVariable(ds, "Fsd" + "_" + str(i) + str(j)) Fnsw = pfp_utils.GetVariable(ds, "Fnsw" + "_" + str(i) + str(j)) Fsu = pfp_utils.CreateEmptyVariable("Fsu" + "_" + str(i) + str(j), nrecs, datetime=Fsd["DateTime"]) Fsu["Data"] = Fsd["Data"] - Fnsw["Data"] Fsu["Flag"] = numpy.where( numpy.ma.getmaskarray(Fsd["Data"]) == True, ones, zeros) Fsu["Attr"] = { "standard_name": "surface_upwelling_shortwave_flux_in_air", "long_name": "Up-welling shortwave radiation", "units": "W/m^2", "statistic_type": "average" } pfp_utils.CreateVariable(ds, Fsu) return
def percent_to_gH2Opm3(ds, AH_out, RH_in, Ta_in): """ Purpose: Function to calculate absolute humidity given relative humidity and air temperature. Absolute humidity is not calculated if any of the input series are missing or if the specified output series already exists in the data structure. The calculated absolute humidity is created as a new series in the data structure. Usage: pfp_func_units.percent_to_gpm3(ds,"AH_HMP_2m","RH_HMP_2m","Ta_HMP_2m") Author: PRI Date: September 2015 """ nRecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nRecs, dtype=numpy.int32) ones = numpy.ones(nRecs, dtype=numpy.int32) for item in [RH_in, Ta_in]: if item not in ds.root["Variables"].keys(): msg = " Requested series " + item + " not found, " + AH_out + " not calculated" logger.error(msg) return 0 # get the relative humidity and check the units RH = pfp_utils.GetVariable(ds, RH_in) RH = pfp_utils.convert_units_func(ds, RH, "percent") # get the temperature and check the units Ta = pfp_utils.GetVariable(ds, Ta_in) Ta = pfp_utils.convert_units_func(ds, Ta, "degC") # get the absolute humidity AH = pfp_utils.GetVariable(ds, AH_out) AH["Data"] = pfp_mf.absolutehumidityfromrelativehumidity( Ta["Data"], RH["Data"]) AH["Flag"] = numpy.where( numpy.ma.getmaskarray(AH["Data"]) == True, ones, zeros) AH["Attr"]["units"] = "g/m^3" pfp_utils.CreateVariable(ds, AH) return 1
def mmolpmol_to_gH2Opm3(ds, AH_out, MF_in, Ta_in, ps_in): """ Purpose: Function to calculate absolute humidity given the water vapour mole fraction, air temperature and pressure. Absolute humidity is not calculated if any of the input series are missing or if the specified output series already exists in the data structure. The calculated absolute humidity is created as a new series in the data structure. Usage: pfp_func_units.mmolpmol_to_gpm3(ds,"AH_IRGA_Av","H2O_IRGA_Av","Ta_HMP_2m","ps") Author: PRI Date: September 2015 """ nRecs = int(ds.root["Attributes"]["nc_nrecs"]) zeros = numpy.zeros(nRecs, dtype=numpy.int32) ones = numpy.ones(nRecs, dtype=numpy.int32) for item in [MF_in, Ta_in, ps_in]: if item not in list(ds.root["Variables"].keys()): msg = " Requested series " + item + " not found, " + AH_out + " not calculated" logger.error(msg) return 0 MF = pfp_utils.GetVariable(ds, MF_in) MF = pfp_utils.convert_units_func(ds, MF, "mmol/mol") Ta = pfp_utils.GetVariable(ds, Ta_in) Ta = pfp_utils.convert_units_func(ds, Ta, "degC") ps = pfp_utils.GetVariable(ds, ps_in) ps = pfp_utils.convert_units_func(ds, ps, "kPa") AH = pfp_utils.GetVariable(ds, AH_out) AH["Data"] = pfp_mf.h2o_gpm3frommmolpmol(MF["Data"], Ta["Data"], ps["Data"]) AH["Flag"] = numpy.where( numpy.ma.getmaskarray(AH["Data"]) == True, ones, zeros) AH["Attr"]["units"] = "g/m^3" pfp_utils.CreateVariable(ds, AH) return 1
def K_to_C(ds, T_out, T_in): """ Purpose: Function to convert temperature from K to C. Usage: pfp_func_units.K_to_C(ds, T_out, T_in) Author: PRI Date: February 2018 """ if T_in not in list(ds.root["Variables"].keys()): msg = " Convert_K_to_C: variable " + T_in + " not found, skipping ..." logger.warning(msg) return 0 if "<" in T_out or ">" in T_out: logger.warning(" ***") msg = " *** " + T_in + ": illegal name (" + T_out + ") in function, skipping ..." logger.warning(msg) logger.warning(" ***") return 0 var_in = pfp_utils.GetVariable(ds, T_in) var_out = pfp_utils.convert_units_func(ds, var_in, "degC", mode="quiet") var_out["Label"] = T_out pfp_utils.CreateVariable(ds, var_out) return 1
def read_isd_file_csv(isd_file_path): """ Purpose: Reads a NOAA ISD CSV file downlaoded from https://www.ncei.noaa.gov/data/global-hourly/access/ These files used to be field formatted ASCII where the character position in a line of ASCII determined the data type. Some time in 2020 or 2021, the old FFA format was replaced with CSV. The format of the old-style .gz files is described in https://www.ncei.noaa.gov/data/global-hourly/doc/isd-format-document.pdf This document still describes the data in the new CSV format. Usage: Side effects: Returns a PFP data structure with the data at the site time step. Author: PRI Date: July 2021 """ msg = " Reading " + isd_file_path logger.info(msg) # list of variables to read from the CSV file csv_labels = [ "STATION", "DATE", "LATITUDE", "LONGITUDE", "ELEVATION", "REPORT_TYPE", "QUALITY_CONTROL", "WND", "TMP", "DEW", "SLP", "AA1", "AA2", "AA3", "AA4" ] # read the CSV file df = pandas.read_csv(isd_file_path, delimiter=",", header=0) # remove items from csv_labels that are not in the data frame df_labels = df.columns.to_list() for csv_label in list(csv_labels): if csv_label not in df_labels: csv_labels.remove(csv_label) # keep only what we need df = df[csv_labels] # remove duplicate dates, keep the SYNOP (FM-12) reports # first, we find the duplicate dates df["Duplicates"] = df["DATE"].duplicated() # next, we drop rows with duplicate dates that are not SYNOP reports df = df.drop(df[(df["Duplicates"]) & (df["REPORT_TYPE"] != "FM-12")].index) # then check for duplicates again df["Duplicates"] = df["DATE"].duplicated() if df["Duplicates"].sum() != 0: msg = " Unable to remove all duplicate dates in files" logger.error(msg) raise ValueError(msg) # convert the date in the CSV file to a pandas datetime df["TIMESTAMP"] = pandas.to_datetime(df["DATE"].astype("string"), errors="raise") # find all of the timestamps (should only be 1) timestamps = list(df.select_dtypes(include=['datetime64'])) # take the first if more than 1 timestamp = timestamps[0] # use the timestamp as the index df.set_index(timestamp, inplace=True) df.index = df.index.round('1S') # wind direction field, see isd_format_document.pdf for details wind = df["WND"].str.split(',', expand=True) df["Wd"] = wind[0].apply(pandas.to_numeric, errors='coerce') df["Ws"] = wind[3].apply(pandas.to_numeric, errors='coerce') / float(10) del df["WND"] # air temperature temperature = df["TMP"].str.split(',', expand=True) df["Ta"] = temperature[0].apply(pandas.to_numeric, errors='coerce') / float(10) del df["TMP"] # dew point temperature dew_point = df["DEW"].str.split(',', expand=True) df["Td"] = dew_point[0].apply(pandas.to_numeric, errors='coerce') / float(10) del df["DEW"] # surface pressure surface_pressure = df["SLP"].str.split(',', expand=True) df["ps"] = surface_pressure[0].apply(pandas.to_numeric, errors='coerce') / float(100) del df["SLP"] # Precipitation is stored in columns AA1 to AA4 but not all columns will be present # # Within each column, precipitation is stored as "HH,PPPP,C,Q" where HH is the # period over which the precipitation was accumulated (e.g. 1, 3, 6, 24 hours), # PPPP is the precipitation amount in mm*10, C is the condition code and Q is # the QC flag (1 = passed all QC checks) # # Column AA1 contains most of the precipitation data. When precipitation data is # available for 2 accumulation periods e.g. 3 hours and 6 or 24 hours, the second # accumulation period is given in AA2. And so on for up to 4 separate accumulation' # periods e.g. 1 hour, 3 hours, 6 hours and 24 hours. # # get a list of the precipitation columns in the data frame precip_labels = [l for l in df.columns.to_list() if "AA" in l] # create a data frame for the precipitation data, same index as main data frame df_precip = pandas.DataFrame(index=df.index) # loop over the precipitation fields for precip_label in precip_labels: # split the "HH,PPPP,C,Q" fields to get individual parts tmp = df[precip_label].str.split(',', expand=True) # name the columns tmp.columns = ["Period", "Amount", "Condition", "Quality"] # coerce to numeric values tmp = tmp.apply(pandas.to_numeric, errors='coerce') # loop over the accumulation periods for n in [1, 3, 6, 24]: # get the data for this accumulation period and store in a new column # e.g. "3_hourly_AA1" tmp.loc[(tmp["Period"] == n) & (tmp["Quality"] == 1), str(n) + "_hourly_" + precip_label] = tmp["Amount"] # drop the intermediate columns, no longer needed tmp = tmp.drop(["Period", "Amount", "Condition", "Quality"], axis=1) # concatenate the new data df_precip = pandas.concat([df_precip, tmp], axis=1) # drop the individual columns e.g. AA1, AA2 etc df.drop(precip_label, axis=1, inplace=True) # now loop over the accumulation periods and combine to get a single column # for each accumulation period for n in [1, 3, 6, 24]: # list of column headings for this accumulation period label = str(n) + "_hourly" hour_labels = [l for l in df_precip.columns.to_list() if label in l] # rename the first column e.g. "3_hourly_AA1" to "3_hourly" df_precip.rename({hour_labels[0]: label}, axis=1, inplace=True) # loop over the remaining columns and merge into a single column for this # accumulation period for hour_label in hour_labels[1:]: # merge "3_hourly" with "3_hourly_AA2" etc df_precip[label] = df_precip[label].combine_first( df_precip[hour_label]) # convert mm*10 to mm df_precip[label] = df_precip[label] / float(10) # delete columns that are no longer needed df_precip.drop(hour_label, axis=1, inplace=True) # print the sum of the 1, 3, 6 and 24 hourly accumulation periods (we expect them to # be equal) msg = " 1 hourly precipitation total is " + str( round(df_precip["1_hourly"].sum(), 4)) logger.info(msg) msg = " 3 hourly precipitation total is " + str( round(df_precip["3_hourly"].sum(), 4)) logger.info(msg) msg = " 6 hourly precipitation total is " + str( round(df_precip["6_hourly"].sum(), 4)) logger.info(msg) msg = " 24 hourly precipitation total is " + str( round(df_precip["24_hourly"].sum(), 4)) logger.info(msg) # choose the most common accumulation period msg = " Using " + df_precip.count().idxmax() + " for precipitation" logger.info(msg) # and use it for the precipitation data df["Precip"] = df_precip[df_precip.count().idxmax()] # now copy the data from a pandas data frame to a PFP data structure nrecs = len(df) ones = numpy.ones(nrecs) zeros = numpy.zeros(nrecs) # create a data structure ds_its = pfp_io.DataStructure() # set the global attributes ds_its.globalattributes["nc_nrecs"] = nrecs ds_its.globalattributes["altitude"] = float(df["ELEVATION"][0]) ds_its.globalattributes["latitude"] = float(df["LATITUDE"][0]) ds_its.globalattributes["longitude"] = float(df["LONGITUDE"][0]) ds_its.globalattributes["isd_site_id"] = int(df["STATION"][0]) # get the datetime variable ldt = pfp_utils.CreateEmptyVariable("DateTime", nrecs) ldt["Data"] = numpy.array(df.index.to_pydatetime()) ldt["Flag"] = zeros ldt["Attr"] = {"long_name": "Datetime in UTC", "units": ""} pfp_utils.CreateVariable(ds_its, ldt) # get the time step dt = pfp_utils.get_timestep(ds_its) time_step = int(scipy.stats.mode(dt / float(60))[0][0]) if time_step not in [10, 30, 60, 180]: msg = " Time step (" + str( time_step) + ") must be 10, 30, 60 or 180 minutes" logger.error(msg) raise ValueError(msg) else: ds_its.globalattributes["time_step"] = int( scipy.stats.mode(dt / float(60))[0][0]) # now add the other variables # wind direction Wd = pfp_utils.CreateEmptyVariable("Wd", nrecs, datetime=ldt["Data"]) Wd["Data"] = numpy.ma.masked_equal(df["Wd"].values, 999) Wd["Flag"] = numpy.where( numpy.ma.getmaskarray(Wd["Data"]) == True, ones, zeros) Wd["Attr"] = { "long_name": "Wind direction", "statistic_type": "average", "standard_name": "wind_from_direction", "units": "degrees" } pfp_utils.CreateVariable(ds_its, Wd) # wind speed Ws = pfp_utils.CreateEmptyVariable("Ws", nrecs, datetime=ldt["Data"]) Ws["Data"] = numpy.ma.masked_equal(df["Ws"].values, 999.9) Ws["Flag"] = numpy.where( numpy.ma.getmaskarray(Ws["Data"]) == True, ones, zeros) Ws["Attr"] = { "long_name": "Wind speed", "statistic_type": "average", "standard_name": "wind_speed", "units": "m/s" } pfp_utils.CreateVariable(ds_its, Ws) # air temperature Ta = pfp_utils.CreateEmptyVariable("Ta", nrecs, datetime=ldt["Data"]) Ta["Data"] = numpy.ma.masked_equal(df["Ta"].values, 999.9) Ta["Flag"] = numpy.where( numpy.ma.getmaskarray(Ta["Data"]) == True, ones, zeros) Ta["Attr"] = { "long_name": "Air temperature", "statistic_type": "average", "standard_name": "air_temperature", "units": "degC" } pfp_utils.CreateVariable(ds_its, Ta) # dew point temperature Td = pfp_utils.CreateEmptyVariable("Td", nrecs, datetime=ldt["Data"]) Td["Data"] = numpy.ma.masked_equal(df["Td"].values, 999.9) Td["Flag"] = numpy.where( numpy.ma.getmaskarray(Td["Data"]) == True, ones, zeros) Td["Attr"] = { "long_name": "Dew point temperature", "statistic_type": "average", "standard_name": "dew_point_temperature", "units": "degC" } pfp_utils.CreateVariable(ds_its, Td) # surface pressure ps = pfp_utils.CreateEmptyVariable("ps", nrecs, datetime=ldt["Data"]) site_altitude = float(ds_its.globalattributes["altitude"]) cfac = numpy.ma.exp( (-1 * site_altitude) / ((Ta["Data"] + 273.15) * 29.263)) ps["Data"] = numpy.ma.masked_equal(df["ps"].values, 9999.9) ps["Data"] = ps["Data"] * cfac ps["Flag"] = numpy.where( numpy.ma.getmaskarray(ps["Data"]) == True, ones, zeros) ps["Attr"] = { "long_name": "Surface pressure", "statistic_type": "average", "standard_name": "surface_air_pressure", "units": "kPa" } pfp_utils.CreateVariable(ds_its, ps) # precipitation Precip = pfp_utils.CreateEmptyVariable("Precip", nrecs, datetime=ldt["Data"]) Precip["Data"] = numpy.ma.masked_equal(df["Precip"].values, 999.9) Precip["Flag"] = numpy.where( numpy.ma.getmaskarray(Precip["Data"]) == True, ones, zeros) Precip["Attr"] = { "long_name": "Rainfall", "statistic_type": "sum", "standard_name": "thickness_of_rainfall_amount", "units": "mm" } pfp_utils.CreateVariable(ds_its, Precip) # relative humidity RH = pfp_utils.CreateEmptyVariable("RH", nrecs, datetime=ldt["Data"]) RH["Data"] = mf.relativehumidityfromdewpoint(Td["Data"], Ta["Data"]) RH["Flag"] = numpy.where( numpy.ma.getmaskarray(RH["Data"]) == True, ones, zeros) RH["Attr"] = { "long_name": "Relative humidity", "statistics_type": "average", "standard_name": "relative_humidity", "units": "percent" } pfp_utils.CreateVariable(ds_its, RH) # absolute humidity AH = pfp_utils.CreateEmptyVariable("AH", nrecs, datetime=ldt["Data"]) AH["Data"] = mf.absolutehumidityfromrelativehumidity( Ta["Data"], RH["Data"]) AH["Flag"] = numpy.where( numpy.ma.getmaskarray(AH["Data"]) == True, ones, zeros) AH["Attr"] = { "long_name": "Absolute humidity", "statistic_type": "average", "standard_name": "mass_concentration_of_water_vapor_in_air", "units": "g/m^3" } pfp_utils.CreateVariable(ds_its, AH) # specific humidity SH = pfp_utils.CreateEmptyVariable("SH", nrecs, datetime=ldt["Data"]) SH["Data"] = mf.specifichumidityfromrelativehumidity( RH["Data"], Ta["Data"], ps["Data"]) SH["Flag"] = numpy.where( numpy.ma.getmaskarray(SH["Data"]) == True, ones, zeros) SH["Attr"] = { "long_name": "Specific humidity", "statistic_type": "average", "standard_name": "specific_humidity", "units": "kg/kg" } pfp_utils.CreateVariable(ds_its, SH) return ds_its
def gfMDS_get_mds_output(ds, mds_label, out_file_path, l5_info, called_by): """ Purpose: Reads the CSV file output by the MDS C code and puts the contents into the data structure. Usage: gfMDS_get_mds_output(ds, out_file_path, first_date, last_date, include_qc=False) where ds is a data structure out_file_path is the full path to the MDS output file Side effects: New series are created in the data structure to hold the MDS data. Author: PRI Date: May 2018 """ # get the MDS flag value from the processing level processing_level = ds.root["Attributes"]["processing_level"] level_number = pfp_utils.strip_non_numeric(processing_level) # MDS flag will be 470 at L4, 570 and L5 mds_flag_value = int(level_number) * 100 + 70 # get the name for the description variable attribute descr_level = "description_" + str( ds.root["Attributes"]["processing_level"]) ldt = pfp_utils.GetVariable(ds, "DateTime") first_date = ldt["Data"][0] last_date = ldt["Data"][-1] data_mds = numpy.genfromtxt(out_file_path, delimiter=",", names=True, autostrip=True, dtype=None) dt_mds = numpy.array( [dateutil.parser.parse(str(dt)) for dt in data_mds["TIMESTAMP"]]) si_mds = pfp_utils.GetDateIndex(dt_mds, first_date) ei_mds = pfp_utils.GetDateIndex(dt_mds, last_date) # get a list of the names in the data array mds_output_names = list(data_mds.dtype.names) # strip out the timestamp and the original data for item in [ "TIMESTAMP", l5_info[called_by]["outputs"][mds_label]["target_mds"] ]: if item in mds_output_names: mds_output_names.remove(item) # and now loop over the MDS output series for mds_output_name in mds_output_names: if mds_output_name == "FILLED": # get the gap filled target and write it to the data structure var_in = pfp_utils.GetVariable( ds, l5_info[called_by]["outputs"][mds_label]["target"]) data = data_mds[mds_output_name][si_mds:ei_mds + 1] idx = numpy.where((numpy.ma.getmaskarray(var_in["Data"]) == True) & (abs(data - c.missing_value) > c.eps))[0] flag = numpy.array(var_in["Flag"]) flag[idx] = numpy.int32(mds_flag_value) attr = copy.deepcopy(var_in["Attr"]) pfp_utils.append_to_attribute( attr, {descr_level: "Gap filled using MDS"}) var_out = { "Label": mds_label, "Data": data, "Flag": flag, "Attr": attr } pfp_utils.CreateVariable(ds, var_out) elif mds_output_name == "TIMEWINDOW": # make the series name for the data structure mds_qc_label = "MDS" + "_" + l5_info[called_by]["outputs"][ mds_label]["target"] + "_" + mds_output_name data = data_mds[mds_output_name][si_mds:ei_mds + 1] flag = numpy.zeros(len(data)) attr = { "long_name": "TIMEWINDOW from MDS gap filling for " + l5_info[called_by]["outputs"][mds_label]["target"] } var_out = { "Label": mds_qc_label, "Data": data, "Flag": flag, "Attr": attr } pfp_utils.CreateVariable(ds, var_out) else: # make the series name for the data structure mds_qc_label = "MDS" + "_" + l5_info[called_by]["outputs"][ mds_label]["target"] + "_" + mds_output_name data = data_mds[mds_output_name][si_mds:ei_mds + 1] flag = numpy.zeros(len(data)) attr = { "long_name": "QC field from MDS gap filling for " + l5_info[called_by]["outputs"][mds_label]["target"] } var_out = { "Label": mds_qc_label, "Data": data, "Flag": flag, "Attr": attr } pfp_utils.CreateVariable(ds, var_out) return