Esempio n. 1
0
def do_EPQCFlagCheck(cf, ds, section, series, code=9):
    """
    Purpose:
     Mask data according to the value of an EddyPro QC flag.
    Usage:
    Author: PRI
    Date: August 2017
    """
    if 'EPQCFlagCheck' not in cf[section][series].keys(): return
    nRecs = int(ds.globalattributes["nc_nrecs"])
    flag = numpy.zeros(nRecs, dtype=numpy.int32)
    source_list = ast.literal_eval(
        cf[section][series]['EPQCFlagCheck']["Source"])
    reject_list = ast.literal_eval(
        cf[section][series]['EPQCFlagCheck']["Reject"])
    variable = qcutils.GetVariable(ds, series)
    for source in source_list:
        epflag = qcutils.GetVariable(ds, source)
        for value in reject_list:
            bool_array = numpy.isclose(epflag["Data"], float(value))
            idx = numpy.where(bool_array == True)[0]
            flag[idx] = numpy.int32(1)
    idx = numpy.where(flag == 1)[0]
    variable["Data"][idx] = numpy.float(c.missing_value)
    variable["Flag"][idx] = numpy.int32(9)
    qcutils.CreateVariable(ds, variable)
    return
Esempio n. 2
0
def interpolate_precip(ds_in, x1, ds_out, x2):
    """
    Purpose:
     Transfer precipitation data from one time stamp to another.
     Interpolating precipitation does not make sense since interpolation
     will change the total amount of the precipitation and because
     precipitation is very intermittent, we do not know how it will
     behave at the missing times.  Instead of interpolation, we assign
     all of the precipitation total at the input time stamp to the
     matching output time stamp.
    Assumptions:
     That the input time stamps are a subset of the output time stamps.
    Usage:
    Side effects:
    Author: PRI
    Date: December 2017
    """
    nrecs = ds_out.globalattributes["nc_nrecs"]
    # local pointer to the output datetime
    ldt = qcutils.GetVariable(ds_out, "DateTime")
    # just assign any precipitation to the ISD time stamp.
    precip_out = qcutils.create_empty_variable("Precip", nrecs, datetime=ldt["Data"])
    # zero the data array
    precip_out["Data"] = precip_out["Data"] * float(0)
    precip_in = qcutils.GetVariable(ds_in, "Precip")
    # get indices of elements where the times match
    idx = numpy.searchsorted(x2, numpy.intersect1d(x2, x1))
    precip_out["Data"][idx] = precip_in["Data"]
    precip_out["Flag"] = numpy.zeros(nrecs, dtype=numpy.int32)
    precip_out["Attr"] = copy.deepcopy(precip_in["Attr"])
    qcutils.CreateVariable(ds_out, precip_out)
    return
Esempio n. 3
0
def interpolate_ds(ds_in, ts):
    """
    Purpose:
     Interpolate the contents of a data structure onto a different time step.
    Assumptions:
    Usage:
    Author: PRI
    Date: June 2017
    """
    logger.info("Interpolating data")
    # instance the output data structure
    ds_out = qcio.DataStructure()
    # copy the global attributes
    ds_out.globalattributes = copy.deepcopy(ds_in.globalattributes)
    # add the time step
    ds_out.globalattributes["time_step"] = str(ts)
    # generate a regular time series at the required time step
    dt = ds_in.series["DateTime"]["Data"]
    dt0 = qcutils.rounddttots(dt[0], ts=ts)
    if dt0 < dt[0]:
        dt0 = dt0 + datetime.timedelta(minutes=ts)
    dt1 = qcutils.rounddttots(dt[-1], ts=ts)
    if dt1 > dt[-1]:
        dt1 = dt1 - datetime.timedelta(minutes=ts)
    idt = [result for result in qcutils.perdelta(dt0, dt1, datetime.timedelta(minutes=ts))]
    x1 = numpy.array([toTimestamp(dt[i]) for i in range(len(dt))])
    x2 = numpy.array([toTimestamp(idt[i]) for i in range(len(idt))])
    # loop over the series in the data structure and interpolate
    flag = numpy.zeros(len(idt), dtype=numpy.int32)
    attr = {"long_name":"Datetime", "units":"none"}
    ldt_var = {"Label":"DateTime", "Data":idt, "Flag":flag, "Attr":attr}
    qcutils.CreateVariable(ds_out, ldt_var)
    qcutils.get_nctime_from_datetime(ds_out)
    nrecs = len(idt)
    ds_out.globalattributes["nc_nrecs"] = nrecs
    # first, we do the air temperature, dew point temperature and surface pressure
    f0 = numpy.zeros(nrecs, dtype=numpy.int32)
    f1 = numpy.ones(nrecs, dtype=numpy.int32)
    for label in ["Ta", "Td", "ps", "RH", "Ah", "q"]:
        var_out = qcutils.create_empty_variable(label, nrecs, datetime=idt)
        var_in = qcutils.GetVariable(ds_in, label)
        var_out["Data"] = interpolate_1d(x1, var_in["Data"], x2)
        var_out["Flag"] = numpy.where(numpy.ma.getmaskarray(var_out["Data"])==True, f1, f0)
        var_out["Attr"] = copy.deepcopy(var_in["Attr"])
        qcutils.CreateVariable(ds_out, var_out)
    # now clamp the dew point so that TD <= TA
    Ta = qcutils.GetVariable(ds_out, "Ta")
    Td = qcutils.GetVariable(ds_out, "Td")
    Td["Data"] = numpy.ma.where(Td["Data"]<=Ta["Data"], x=Td["Data"], y=Ta["Data"])
    qcutils.CreateVariable(ds_out, Td)
    # now we do wind speed and direction by converting to U and V components
    interpolate_wswd(ds_in, x1, ds_out, x2)
    # and lastly, do precipitation
    interpolate_precip(ds_in, x1, ds_out, x2)

    return ds_out
Esempio n. 4
0
def interpolate_wswd(ds_in, x1, ds_out, x2):
    """
    Purpose:
     Interpolate wind speed and direction by converting them to U and V
     components first, doing the interpolation and then converting back
     to wind speed and direction.
    Usage:
    Side effects
    Author: PRI
    Date: December 2017
    """
    # get the number of records in the output data
    nrecs = ds_out.globalattributes["nc_nrecs"]
    f0 = numpy.zeros(nrecs, dtype=numpy.int32)
    f1 = numpy.ones(nrecs, dtype=numpy.int32)
    # local pointer to the output datetime
    ldt = qcutils.GetVariable(ds_out, "DateTime")
    # create empty variables for the output dara
    Ws_out = qcutils.create_empty_variable("Ws", nrecs, datetime=ldt["Data"])
    Wd_out = qcutils.create_empty_variable("Wd", nrecs, datetime=ldt["Data"])
    U_out = qcutils.create_empty_variable("u", nrecs, datetime=ldt["Data"])
    V_out = qcutils.create_empty_variable("v", nrecs, datetime=ldt["Data"])
    # get the input wind speed and direction
    Ws_in = qcutils.GetVariable(ds_in, "Ws")
    Wd_in = qcutils.GetVariable(ds_in, "Wd")
    # covert to U and V components
    U_in, V_in = qcutils.convert_WSWDtoUV(Ws_in, Wd_in)
    # interpolate the components to the output time stamp
    U_out["Data"] = interpolate_1d(x1, U_in["Data"], x2)
    V_out["Data"] = interpolate_1d(x1, V_in["Data"], x2)
    # add the QC flag and update variable attributes
    U_out["Flag"] = numpy.ma.where(numpy.ma.getmaskarray(U_out["Data"])==True, f1, f0)
    V_out["Flag"] = numpy.ma.where(numpy.ma.getmaskarray(V_out["Data"])==True, f1, f0)
    U_out["Attr"]["long_name"] = "U component of wind velocity, postive east"
    U_out["Attr"]["units"] = "m/s"
    V_out["Attr"]["long_name"] = "V component of wind velocity, postive north"
    V_out["Attr"]["units"] = "m/s"
    # write the U and V components to the output data structure
    qcutils.CreateVariable(ds_out, U_out)
    qcutils.CreateVariable(ds_out, V_out)
    # convert the interpolated components to wind speed and direction
    Ws_out, Wd_out = qcutils.convert_UVtoWSWD(U_out, V_out)
    # add the Qc flag and update the variable attributes
    Ws_out["Flag"] = numpy.ma.where(numpy.ma.getmaskarray(Ws_out["Data"])==True, f1, f0)
    Wd_out["Flag"] = numpy.ma.where(numpy.ma.getmaskarray(Wd_out["Data"])==True, f1, f0)
    Ws_out["Attr"] = copy.deepcopy(Ws_in["Attr"])
    Wd_out["Attr"] = copy.deepcopy(Wd_in["Attr"])
    # write the wind speed and direction into the output data structure
    qcutils.CreateVariable(ds_out, Ws_out)
    qcutils.CreateVariable(ds_out, Wd_out)
    return
Esempio n. 5
0
def run_mpt_code(ds, nc_file_name):
    ldt = qcutils.GetVariable(ds, "DateTime")
    out_file_paths = {}
    header = "TIMESTAMP,NEE,VPD,USTAR,TA,SW_IN,H,LE"
    fmt = "%12i,%f,%f,%f,%f,%f,%f,%f"
    first_year = ldt["Data"][0].year
    last_year = ldt["Data"][-1].year
    log_file_path = os.path.join("mpt", "log", "mpt.log")
    mptlogfile = open(log_file_path, "wb")
    in_base_path = os.path.join("mpt", "input", "")
    out_base_path = os.path.join("mpt", "output", "")
    for current_year in range(first_year, last_year + 1):
        in_name = nc_file_name.replace(".nc",
                                       "_" + str(current_year) + "_MPT.csv")
        in_full_path = os.path.join(in_base_path, in_name)
        out_full_path = in_full_path.replace("input", "output").replace(
            ".csv", "_ut.txt")
        data = make_data_array(ds, current_year)
        numpy.savetxt(in_full_path,
                      data,
                      header=header,
                      delimiter=",",
                      comments="",
                      fmt=fmt)
        ustar_mp_exe = os.path.join(".", "mpt", "bin", "ustar_mp")
        cmd = [
            ustar_mp_exe, "-input_path=" + in_full_path,
            "-output_path=" + out_base_path
        ]
        subprocess.call(cmd, stdout=mptlogfile)
        if os.path.isfile(out_full_path):
            out_file_paths[current_year] = out_full_path
    mptlogfile.close()
    return out_file_paths
Esempio n. 6
0
def gfMDS_make_data_array(ds, current_year, info):
    """
    Purpose:
     Create a data array for the MDS gap filling routine.  The array constructed
     here will be written to a CSV file that is read by the MDS C code.
    Usage:
    Side Effects:
     The constructed data arrays are full years.  That is they run from YYYY-01-01 00:30
     to YYYY+1-01-01 00:00.  Missing data is represented as -9999.
    Author: PRI
    Date: May 2018
    """
    ldt = qcutils.GetVariable(ds, "DateTime")
    nrecs = ds.globalattributes["nc_nrecs"]
    ts = int(ds.globalattributes["time_step"])
    start = datetime.datetime(current_year, 1, 1, 0, 30, 0)
    end = datetime.datetime(current_year + 1, 1, 1, 0, 0, 0)
    cdt = numpy.array([
        dt
        for dt in qcutils.perdelta(start, end, datetime.timedelta(minutes=ts))
    ])
    mt = numpy.ones(len(cdt)) * float(-9999)
    # need entry for the timestamp and the target ...
    array_list = [cdt, mt]
    # ... and entries for the drivers
    for driver in info["drivers"]:
        array_list.append(mt)
    # now we can create the data array
    data = numpy.stack(array_list, axis=-1)
    si = qcutils.GetDateIndex(ldt["Data"], start, default=0)
    ei = qcutils.GetDateIndex(ldt["Data"], end, default=nrecs)
    dt = qcutils.GetVariable(ds, "DateTime", start=si, end=ei)
    idx1, _ = qcutils.FindMatchingIndices(cdt, dt["Data"])
    pfp_label_list = [info["target"]] + info["drivers"]
    mds_label_list = [info["target_mds"]] + info["drivers_mds"]
    header = "TIMESTAMP"
    fmt = "%12i"
    for n, label in enumerate(pfp_label_list):
        var = qcutils.GetVariable(ds, label, start=si, end=ei)
        data[idx1, n + 1] = var["Data"]
        header = header + "," + mds_label_list[n]
        fmt = fmt + "," + "%f"
    # convert datetime to ISO dates
    data[:, 0] = numpy.array([int(xdt.strftime("%Y%m%d%H%M")) for xdt in cdt])
    return data, header, fmt
Esempio n. 7
0
def mask_long_gaps(ds_int, ds_avg, max_interp_length):
    """
    Purpose:
    Assumptions:
    Usage:
    Side effects:
    Author: PRI
    Date: December 2017
    """
    logger.info("Masking long gaps")
    # make a copy of the interpolated data structure
    ds_mlg = copy.deepcopy(ds_int)
    nrecs = ds_int.globalattributes["nc_nrecs"]
    # get a list of the variable labels in this group
    labels = [label for label in ds_avg.series.keys() if label not in ["DateTime","time"]]
    # loop oover the variables in this group
    for label in labels:
        # check to see if this variable exists in the interpolated data structure
        if label not in ds_int.series:
            msg = "mask_long_gaps: variable "+label+" not found, skipping ..."
            logger.warning(msg)
            continue
        # get the average and interpolated variables
        var_avg = qcutils.GetVariable(ds_avg, label)
        var_int = qcutils.GetVariable(ds_int, label)
        # make a copy of the interpolated variable as a base for the output variable
        var_mlg = copy.deepcopy(var_int)
        # get a boolean array that is True where the average data is masked
        # Note to self: why not use numpy.ma.getmaskarray() here?
        mask = numpy.ma.getmaskarray(var_avg["Data"])
        cond_bool = numpy.ma.where(mask == True, True, False)
        # 2D array of start and end indices for gaps longer that max_interp_length
        cidx = contiguous_regions(cond_bool, max_interp_length = max_interp_length)
        # loop over gaps longer than max_interp_length
        for start, stop in cidx:
            # mask the interpolated data in gaps longer than max_interp_length
            var_mlg["Data"].mask[start:stop] = True
            var_mlg["Flag"][start:stop] = 1
        # write the masked variable back to the output data structure
        qcutils.CreateVariable(ds_mlg, var_mlg)
    # return a copy of the interpolated data structure with gaps longer than max_interp_length masked
    return ds_mlg
Esempio n. 8
0
def make_data_array(ds, current_year):
    ldt = qcutils.GetVariable(ds, "DateTime")
    nrecs = ds.globalattributes["nc_nrecs"]
    ts = int(ds.globalattributes["time_step"])
    start = datetime.datetime(current_year, 1, 1, 0, 30, 0)
    end = datetime.datetime(current_year + 1, 1, 1, 0, 0, 0)
    cdt = numpy.array([
        dt
        for dt in qcutils.perdelta(start, end, datetime.timedelta(minutes=ts))
    ])
    mt = numpy.ones(len(cdt)) * float(-9999)
    data = numpy.stack([cdt, mt, mt, mt, mt, mt, mt, mt], axis=-1)
    si = qcutils.GetDateIndex(ldt["Data"], start, default=0)
    ei = qcutils.GetDateIndex(ldt["Data"], end, default=nrecs)
    dt = qcutils.GetVariable(ds, "DateTime", start=si, end=ei)
    idx1, idx2 = qcutils.FindMatchingIndices(cdt, dt["Data"])
    for n, label in enumerate(["Fc", "VPD", "ustar", "Ta", "Fsd", "Fh", "Fe"]):
        var = qcutils.GetVariable(ds, label, start=si, end=ei)
        data[idx1, n + 1] = var["Data"]
    # convert datetime to ISO dates
    data[:, 0] = numpy.array([int(xdt.strftime("%Y%m%d%H%M")) for xdt in cdt])
    return data
Esempio n. 9
0
def ConvertK2C(ds, T_in, T_out):
    """
    Purpose:
     Function to convert temperature from K to C.
    Usage:
     qcfunc.ConvertK2C(ds, T_in, T_out)
    Author: PRI
    Date: February 2018
    """
    var_in = qcutils.GetVariable(ds, T_in)
    var_out = qcutils.convert_units_func(ds, var_in, "C", mode="quiet")
    var_out["Label"] = T_out
    qcutils.CreateVariable(ds, var_out)
    return 1
Esempio n. 10
0
def ConvertPa2kPa(ds, ps_in, ps_out):
    """
    Purpose:
     Function to convert pressure from Pa to kPa.
    Usage:
     qcfunc.ConvertPa2kPa(ds, ps_in, ps_out)
    Author: PRI
    Date: February 2018
    """
    var_in = qcutils.GetVariable(ds, ps_in)
    var_out = qcutils.convert_units_func(ds, var_in, "kPa", mode="quiet")
    var_out["Label"] = ps_out
    qcutils.CreateVariable(ds, var_out)
    return 1
Esempio n. 11
0
             ds_all.globalattributes[gattr] = ds_out[i].globalattributes[gattr]
     # and update the site specific global attributes
     ds_all.globalattributes["time_zone_"+isd_site_id] = ds_out[i].globalattributes["time_zone"]
     ds_all.globalattributes["latitude_"+isd_site_id] = ds_out[i].globalattributes["latitude"]
     ds_all.globalattributes["longitude_"+isd_site_id] = ds_out[i].globalattributes["longitude"]
     ds_all.globalattributes["altitude_"+isd_site_id] = ds_out[i].globalattributes["altitude"]
     # now copy the variables
     # first, we get the indices of matching datetimes
     ldt_one = ds_out[i].series["DateTime"]["Data"]
     idx = numpy.searchsorted(ldt_all, numpy.intersect1d(ldt_all, ldt_one))
     # then we get a list of the variables to copy
     labels = [label for label in ds_out[i].series.keys() if label not in ["DateTime"]]
     # and then we loop over the variables to be copied
     for label in labels:
         # read the data out of the ISD site data structure
         var_out = qcutils.GetVariable(ds_out[i], label)
         # create an empty output variable with a number, unique to each ISD station,
         # appended to the label
         var_all = qcutils.create_empty_variable(label+"_"+str(i), nrecs)
         # copy the variable attributes
         var_all["Attr"] = copy.deepcopy(var_out["Attr"])
         # add the ISD site ID
         var_all["Attr"]["isd_site_id"] = isd_site_id
         # copy the data and flag onto the matching times
         var_all["Data"][idx] = var_out["Data"]
         var_all["Flag"][idx] = var_out["Flag"]
         # put the data, flag and attributes into the all-in-one data structure
         qcutils.CreateVariable(ds_all, var_all)
 # write the netCDF file with the combined data for this year
 if len(fluxnet_id) == 0:
     nc_dir_path = os.path.join(out_base_path,site,"Data","ISD")
Esempio n. 12
0
def gfMDS_plot(cf, pd, ds, mds_label):
    ts = int(ds.globalattributes["time_step"])
    drivers = ds.mds[mds_label]["drivers"]
    target = ds.mds[mds_label]["target"]
    Hdh = qcutils.GetVariable(ds, "Hdh")
    obs = qcutils.GetVariable(ds, target)
    mds = qcutils.GetVariable(ds, mds_label)
    if pd["show_plots"]:
        plt.ion()
    else:
        plt.ioff()
    fig = plt.figure(pd["fig_num"], figsize=(13, 8))
    fig.clf()
    fig.canvas.set_window_title(target)
    plt.figtext(0.5, 0.95, pd["title"], ha='center', size=16)

    # diurnal plot
    # XY plot of the diurnal variation
    rect1 = [0.10, pd["margin_bottom"], pd["xy_width"], pd["xy_height"]]
    ax1 = plt.axes(rect1)
    # get the diurnal stats of the observations
    mask = numpy.ma.mask_or(obs["Data"].mask, mds["Data"].mask)
    obs_mor = numpy.ma.array(obs["Data"], mask=mask)
    _, Hr1, Av1, _, _, _ = gf_getdiurnalstats(Hdh["Data"], obs_mor, ts)
    ax1.plot(Hr1, Av1, 'b-', label="Obs")
    # get the diurnal stats of all SOLO predictions
    _, Hr2, Av2, _, _, _ = gf_getdiurnalstats(Hdh["Data"], mds["Data"], ts)
    ax1.plot(Hr2, Av2, 'r-', label="MDS")
    plt.xlim(0, 24)
    plt.xticks([0, 6, 12, 18, 24])
    ax1.set_ylabel(target)
    ax1.set_xlabel('Hour')
    ax1.legend(loc='upper right', frameon=False, prop={'size': 8})

    # histogram of window size
    time_window = qcutils.GetVariable(ds, "MDS_" + target + "_TIMEWINDOW")
    idx = numpy.where(mds["Flag"] == 40)[0]
    if len(idx) != 0:
        tw_hist_data = time_window["Data"][idx]
        rect2 = [0.40, pd["margin_bottom"], pd["xy_width"], pd["xy_height"]]
        ax2 = plt.axes(rect2)
        ax2.hist(tw_hist_data)
        ax2.set_ylabel("Occurrence")
        ax2.set_xlabel("MDS window length")

    # write statistics to the plot
    numpoints = numpy.ma.count(obs["Data"])
    numfilled = numpy.ma.count(mds["Data"]) - numpy.ma.count(obs["Data"])
    plt.figtext(0.65, 0.225, 'No. points')
    plt.figtext(0.75, 0.225, str(numpoints))
    plt.figtext(0.65, 0.200, 'No. filled')
    plt.figtext(0.75, 0.200, str(numfilled))
    avg_obs = numpy.ma.mean(obs["Data"])
    avg_mds = numpy.ma.mean(mds["Data"])
    plt.figtext(0.65, 0.175, 'Avg (obs)')
    plt.figtext(0.75, 0.175, '%.4g' % (avg_obs))
    plt.figtext(0.65, 0.150, 'Avg (MDS)')
    plt.figtext(0.75, 0.150, '%.4g' % (avg_mds))
    var_obs = numpy.ma.var(obs["Data"])
    var_mds = numpy.ma.var(mds["Data"])
    plt.figtext(0.65, 0.125, 'Var (obs)')
    plt.figtext(0.75, 0.125, '%.4g' % (var_obs))
    plt.figtext(0.65, 0.100, 'Var (MDS)')
    plt.figtext(0.75, 0.100, '%.4g' % (var_mds))

    # time series of drivers and target
    ts_axes = []
    rect = [
        pd["margin_left"], pd["ts_bottom"], pd["ts_width"], pd["ts_height"]
    ]
    ts_axes.append(plt.axes(rect))
    ts_axes[0].plot(obs["DateTime"], obs["Data"], 'b.', mds["DateTime"],
                    mds["Data"], 'r-')
    ts_axes[0].set_xlim(obs["DateTime"][0], obs["DateTime"][-1])
    TextStr = target + '_obs (' + obs['Attr']['units'] + ')'
    ts_axes[0].text(0.05,
                    0.85,
                    TextStr,
                    color='b',
                    horizontalalignment='left',
                    transform=ts_axes[0].transAxes)
    TextStr = target + '(' + mds['Attr']['units'] + ')'
    ts_axes[0].text(0.85,
                    0.85,
                    TextStr,
                    color='r',
                    horizontalalignment='right',
                    transform=ts_axes[0].transAxes)
    for i, driver in enumerate(drivers):
        this_bottom = pd["ts_bottom"] + (i + 1) * pd["ts_height"]
        rect = [
            pd["margin_left"], this_bottom, pd["ts_width"], pd["ts_height"]
        ]
        ts_axes.append(plt.axes(rect, sharex=ts_axes[0]))
        drv = qcutils.GetVariable(ds, driver)
        drv_notgf = numpy.ma.masked_where(drv["Flag"] != 0, drv["Data"])
        drv_gf = numpy.ma.masked_where(drv["Flag"] == 0, drv["Data"])
        ts_axes[i + 1].plot(drv["DateTime"], drv_notgf, 'b-')
        ts_axes[i + 1].plot(drv["DateTime"], drv_gf, 'r-', linewidth=2)
        plt.setp(ts_axes[i + 1].get_xticklabels(), visible=False)
        TextStr = driver + '(' + drv['Attr']['units'] + ')'
        ts_axes[i + 1].text(0.05,
                            0.85,
                            TextStr,
                            color='b',
                            horizontalalignment='left',
                            transform=ts_axes[i + 1].transAxes)

    # save a hard copy
    sdt = obs["DateTime"][0].strftime("%Y%m%d")
    edt = obs["DateTime"][-1].strftime("%Y%m%d")
    if "plot_path" in cf["Files"]:
        plot_path = cf["Files"]["plot_path"] + "L5/"
    else:
        plot_path = "plots/L5"
    if not os.path.exists(plot_path):
        os.makedirs(plot_path)
    figname = plot_path + pd["site_name"].replace(" ",
                                                  "") + "_MDS_" + pd["label"]
    figname = figname + "_" + sdt + "_" + edt + '.png'
    fig.savefig(figname, format='png')
    if pd["show_plots"]:
        plt.draw()
        plt.pause(1)
        plt.ioff()
    else:
        plt.close(fig)
        plt.ion()
    return
Esempio n. 13
0
def GapFillFluxUsingMDS(cf, ds):
    """
    Purpose:
     Run the FluxNet C code to implement the MDS gap filling method.
    Usage:
    Side effects:
    Author: PRI
    Date: May 2018
    """
    if "mds" not in dir(ds):
        return
    # get the file name
    file_path = cf["Files"]["file_path"]
    file_name = cf["Files"]["in_filename"]
    nc_full_path = os.path.join(file_path, file_name)
    nc_name = os.path.split(nc_full_path)[1]
    # get some useful metadata
    ts = int(ds.globalattributes["nc_nrecs"])
    site_name = ds.globalattributes["site_name"]
    level = ds.globalattributes["nc_level"]
    # define the MDS input file location
    in_base_path = os.path.join("mds", "input")
    # get a list of CSV files in the input directory
    in_path_files = glob.glob(os.path.join(in_base_path, "*.csv"))
    # and clean them out
    for in_file in in_path_files:
        if os.path.exists(in_file):
            os.remove(in_file)
    # define the MDS output file location
    out_base_path = os.path.join("mds", "output", "")
    # get a list of CSV files in the output directory
    out_path_files = glob.glob(os.path.join(out_base_path, "*.csv"))
    # and clean them out
    for out_file in out_path_files:
        if os.path.exists(out_file):
            os.remove(out_file)
    # get some useful odds and ends
    ldt = qcutils.GetVariable(ds, "DateTime")
    first_year = ldt["Data"][0].year
    last_year = ldt["Data"][-1].year
    # now loop over the series to be gap filled using MDS
    # open a log file for the MDS C code output
    log_file_path = os.path.join("mds", "log", "mds.log")
    mdslogfile = open(log_file_path, "wb")
    for fig_num, mds_label in enumerate(ds.mds):
        logger.info(" Doing MDS gap filling for %s",
                    ds.mds[mds_label]["target"])
        ds.mds[mds_label]["out_base_path"] = out_base_path
        ds.mds[mds_label]["time_step"] = ts
        # make the output file name
        out_name = site_name + "_" + level + "_" + mds_label + "_mds.csv"
        out_file_path = os.path.join(out_base_path, out_name)
        # first, we write the yearly CSV input files
        ds.mds[mds_label]["in_file_paths"] = []
        for current_year in range(first_year, last_year + 1):
            in_name = nc_name.replace(".nc",
                                      "_" + str(current_year) + "_MDS.csv")
            #in_name = str(current_year)+".csv"
            in_file_path = os.path.join(in_base_path, in_name)
            data, header, fmt = gfMDS_make_data_array(ds, current_year,
                                                      ds.mds[mds_label])
            numpy.savetxt(in_file_path,
                          data,
                          header=header,
                          delimiter=",",
                          comments="",
                          fmt=fmt)
            ds.mds[mds_label]["in_file_paths"].append(in_file_path)
        # then we construct the MDS C code command options list
        cmd = gfMDS_make_cmd_string(ds.mds[mds_label])
        # then we spawn a subprocess for the MDS C code
        subprocess.call(cmd, stdout=mdslogfile)
        mds_out_file = os.path.join("mds", "output", "mds.csv")
        os.rename(mds_out_file, out_file_path)
        # and put the MDS results into the data structure
        gfMDS_get_mds_output(ds, mds_label, out_file_path)
        # plot the MDS results
        target = ds.mds[mds_label]["target"]
        drivers = ds.mds[mds_label]["drivers"]
        title = site_name + ' : Comparison of tower and MDS data for ' + target
        pd = gfMDS_initplot(site_name=site_name,
                            label=target,
                            fig_num=fig_num,
                            title=title,
                            nDrivers=len(drivers),
                            show_plots=True)
        gfMDS_plot(cf, pd, ds, mds_label)

    # close the log file
    mdslogfile.close()
    return
Esempio n. 14
0
def gfMDS_get_mds_output(ds, mds_label, out_file_path, include_qc=False):
    """
    Purpose:
     Reads the CSV file output by the MDS C code and puts the contents into
     the data structure.
    Usage:
     gfMDS_get_mds_output(ds, out_file_path, first_date, last_date, include_qc=False)
     where ds is a data structure
           out_file_path is the full path to the MDS output file
           include_qc controls treatment of the MDS QC output
                      True = include QC output
                      False = do not include QC output
    Side effects:
     New series are created in the data structure to hold the MDS data.
    Author: PRI
    Date: May 2018
    """
    ldt = qcutils.GetVariable(ds, "DateTime")
    first_date = ldt["Data"][0]
    last_date = ldt["Data"][-1]
    data_mds = numpy.genfromtxt(out_file_path,
                                delimiter=",",
                                names=True,
                                autostrip=True,
                                dtype=None)
    dt_mds = numpy.array(
        [dateutil.parser.parse(str(dt)) for dt in data_mds["TIMESTAMP"]])
    si_mds = qcutils.GetDateIndex(dt_mds, first_date)
    ei_mds = qcutils.GetDateIndex(dt_mds, last_date)
    # get a list of the names in the data array
    mds_output_names = list(data_mds.dtype.names)
    # strip out the timestamp and the original data
    for item in ["TIMESTAMP", ds.mds[mds_label]["target_mds"]]:
        if item in mds_output_names:
            mds_output_names.remove(item)
    # check to see if the QC outputs have been requested
    if not include_qc:
        # if not, then remove them from the list of requested outputs
        for item in ["QC", "HAT", "SAMPLE", "STDDEV", "METHOD", "QC_HAT"]:
            if item in mds_output_names:
                mds_output_names.remove(item)
    # and now loop over the MDS output series
    for mds_output_name in mds_output_names:
        if mds_output_name == "FILLED":
            # get the gap filled target and write it to the data structure
            var_in = qcutils.GetVariable(ds, ds.mds[mds_label]["target"])
            data = data_mds[mds_output_name][si_mds:ei_mds + 1]
            idx = numpy.where((numpy.ma.getmaskarray(var_in["Data"]) == True)
                              & (abs(data - c.missing_value) > c.eps))[0]
            flag = numpy.array(var_in["Flag"])
            flag[idx] = numpy.int32(40)
            attr = copy.deepcopy(var_in["Attr"])
            attr["long_name"] = attr["long_name"] + ", gap filled using MDS"
            var_out = {
                "Label": mds_label,
                "Data": data,
                "Flag": flag,
                "Attr": attr
            }
            qcutils.CreateVariable(ds, var_out)
        elif mds_output_name == "TIMEWINDOW":
            # make the series name for the data structure
            mds_qc_label = "MDS" + "_" + ds.mds[mds_label][
                "target"] + "_" + mds_output_name
            data = data_mds[mds_output_name][si_mds:ei_mds + 1]
            flag = numpy.zeros(len(data))
            attr = {
                "long_name":
                "TIMEWINDOW from MDS gap filling for " +
                ds.mds[mds_label]["target"]
            }
            var_out = {
                "Label": mds_qc_label,
                "Data": data,
                "Flag": flag,
                "Attr": attr
            }
            qcutils.CreateVariable(ds, var_out)
        else:
            # make the series name for the data structure
            mds_qc_label = "MDS" + "_" + ds.mds[mds_label][
                "target"] + "_" + mds_output_name
            data = data_mds[mds_output_name][si_mds:ei_mds + 1]
            flag = numpy.zeros(len(data))
            attr = {
                "long_name":
                "QC field from MDS gap filling for " +
                ds.mds[mds_label]["target"]
            }
            var_out = {
                "Label": mds_qc_label,
                "Data": data,
                "Flag": flag,
                "Attr": attr
            }
            qcutils.CreateVariable(ds, var_out)
    return
Esempio n. 15
0
def average_duplicate_times(ds_in, time_step):
    """
    Purpose:
     Remove duplicate time steps by averaging data with the same time stamp.
     The routine uses scipy.stats.binned_statistics() to bin the data based
     on the time (bins have width time_step and are centered on times that
     are an integral of time_step).
    Usage:
     ds_out = average_duplicate_times(ds_in, time_step=30)
    Side effects:
     The time given for the averages and sums is the end of the time period.
    Author: PRI
    Date: October 2017
    """
    logger.info("Getting data onto a regular time step")
    # get the time as a number (see attr["units"] for units)
    time_var = qcutils.GetVariable(ds_in, "time")
    # generate an array of bin edges for use by binned_statistics()
    bin_width = time_step*60
    # round the ISD start time to an integral of the time step
    t0 = time_step*60*int(time_var["Data"].data[0]/(time_step*60))
    bin_first = t0 - bin_width
    # round the ISD end time to an integral of the time step
    t1 = time_step*60*int(time_var["Data"].data[-1]/(time_step*60))
    # make sure we go 1 beyond the end time
    if t1 < time_var["Data"][-1]:
        t1 = t1 + bin_width
    # generate an array of bin edges
    bin_last = t1 + bin_width
    bins = numpy.arange(bin_first, bin_last, bin_width)
    # get the number of records in the output series
    nrecs = len(bins)-1
    # generate series of zeros and ones to be used as QC flags
    f0 = numpy.zeros(nrecs)
    f1 = numpy.ones(nrecs)
    # create an output data structure with a copy of the input global attributes
    ds_out = qcio.DataStructure()
    ds_out.globalattributes = copy.deepcopy(ds_in.globalattributes)
    # update the number of records
    ds_out.globalattributes["nc_nrecs"] = nrecs
    # get a list of variable labels but exclude the datetime, time, wind speed and direction variables
    # NB: Wind velocity components U and V will be averaged and wind speed and direction calculated
    # from these.
    labels = [label for label in ds_in.series.keys() if label not in ["DateTime", "time"]]
    # loop over variables
    for label in labels:
        # get the variable
        var_in = qcutils.GetVariable(ds_in, label)
        # indices of non-masked elements
        idx = numpy.ma.where(numpy.ma.getmaskarray(var_in["Data"]) == False)[0]
        # check to see if we have at least 1 data point to deal with
        if len(idx) != 0:
            # get the non-masked data as an ndarray
            data_in = numpy.array(var_in["Data"][idx].data)
            time_in = numpy.array(time_var["Data"][idx].data)
            # use binned_statistic() to average records with the same datetime
            if var_in["Label"][0:1] == "P" and var_in["Attr"]["units"] in ["m", "mm"]:
                # do sum for precipitation
                sums, edges, indices = scipy.stats.binned_statistic(time_in, data_in, statistic="sum", bins=bins)
                # convert output to a masked array and mask empty bins
                data_out = numpy.ma.masked_where(numpy.isfinite(sums) == False, numpy.ma.array(sums))
            else:
                # do average for everything else
                means, edges, indices = scipy.stats.binned_statistic(time_in, data_in, statistic="mean", bins=bins)
                # convert output to a masked array and mask empty bins
                data_out = numpy.ma.masked_where(numpy.isfinite(means) == False, numpy.ma.array(means))
            # generate the QC flag
            flag_out = numpy.where(numpy.ma.getmaskarray(data_out) == True, f1, f0)
            # and create the output variable
            var_out = {"Label":label, "Data":data_out, "Flag":flag_out, "Attr":var_in["Attr"]}
        else:
            # no data, so create an empty output variable
            var_out = {"Label":label, "Data":numpy.ma.masked_all(nrecs), "Flag":f1,
                       "Attr":var_in["Attr"]}
        # and write the output variable to the output data structure
        qcutils.CreateVariable(ds_out, var_out)
    # generate a series of the bin mid-points
    mids = edges[1:]
    # and convert these to a series of Python datetimes
    attr = copy.deepcopy(ds_in.series["DateTime"]["Attr"])
    ldt_out = {"Label":"DateTime", "Data":netCDF4.num2date(mids, time_var["Attr"]["units"]),
               "Flag":f0, "Attr":attr}
    # and write the datetime to the output data structure
    qcutils.CreateVariable(ds_out, ldt_out)
    qcutils.get_nctime_from_datetime(ds_out)
    # get wind speed and direction from components
    U = qcutils.GetVariable(ds_out, "u")
    V = qcutils.GetVariable(ds_out, "v")
    WS, WD = qcutils.convert_UVtoWSWD(U, V)
    qcutils.CreateVariable(ds_out, WS)
    qcutils.CreateVariable(ds_out, WD)
    return ds_out
Esempio n. 16
0
def read_isd_file(isd_file_path):
    """
    Purpose:
     Reads an ISD CSV file (gz or uncompressed) and returns the data in a data structure.
    Assumptions:
    Usage:
    Author: PRI
    Date: June 2017
    """
    isd_file_name = os.path.split(isd_file_path)[1]
    msg = "Reading ISD file "+isd_file_name
    logger.info(msg)
    isd_site_id = isd_file_name.split("-")
    isd_site_id = isd_site_id[0]+"-"+isd_site_id[1]
    # read the file
    if os.path.splitext(isd_file_path)[1] == ".gz":
        with gzip.open(isd_file_path, 'rb') as fp:
            content = fp.readlines()
    else:
        with open(isd_file_path) as fp:
            content = fp.readlines()
    # get a data structure
    ds = qcio.DataStructure()
    # get the site latitude, longitude and altitude
    ds.globalattributes["altitude"] = float(content[0][46:51])
    ds.globalattributes["latitude"] = float(content[0][28:34])/float(1000)
    ds.globalattributes["longitude"] = float(content[0][34:41])/float(1000)
    ds.globalattributes["isd_site_id"] = isd_site_id
    # initialise the data structure
    isd = {}
    isd["DateTime"] = {"Data":[],"Flag":[],"Attr":{"long_name":"Datetime","units":"none"}}
    isd["Wd"] = {"Data":[],"Attr":{"long_name":"Wind direction","units":"degrees","missing_value":999}}
    isd["Ws"] = {"Data":[],"Attr":{"long_name":"Wind speed","units":"m/s","missing_value":999.9}}
    isd["Ta"] = {"Data":[],"Attr":{"long_name":"Air temperature","units":"C","missing_value":999.9}}
    isd["Td"] = {"Data":[],"Attr":{"long_name":"Dew point temperature","units":"C","missing_value":999.9}}
    isd["ps"] = {"Data":[],"Attr":{"long_name":"Surface pressure","units":"kPa","missing_value":9999.9}}
    isd["Precip"] = {"Data":[],"Attr":{"long_name":"Precipitation","units":"mm","missing_value":999.9}}
    # define the codes for good data in the ISD file
    OK_obs_code = ["AUTO ","CRN05","CRN15","FM-12","FM-15","FM-16","SY-MT"]
    # iterate over the lines in the file and decode the data
    for i in range(len(content)-1):
    #for i in range(10):
        # filter out anything other than hourly data
        if content[i][41:46] not in OK_obs_code: continue
        YY = int(content[i][15:19])
        MM = int(content[i][19:21])
        DD = int(content[i][21:23])
        HH = int(content[i][23:25])
        mm = int(content[i][25:27])
        dt = datetime.datetime(YY,MM,DD,HH,mm,0)
        #isd["DateTime"]["Data"].append(pytz.utc.localize(dt))
        isd["DateTime"]["Data"].append(dt)
        # wind direction, degT
        try:
            isd["Wd"]["Data"].append(float(content[i][60:63]))
        except:
            isd["Wd"]["Data"].append(float(999))
        # wind speed, m/s
        try:
            isd["Ws"]["Data"].append(float(content[i][65:69])/float(10))
        except:
            isd["Ws"]["Data"].append(float(999.9))
        # air temperature, C
        try:
            isd["Ta"]["Data"].append(float(content[i][87:92])/float(10))
        except:
            isd["Ta"]["Data"].append(float(999.9))
        # dew point temperature, C
        try:
            isd["Td"]["Data"].append(float(content[i][93:98])/float(10))
        except:
            isd["Td"]["Data"].append(float(999.9))
        # sea level pressure, hPa
        try:
            isd["ps"]["Data"].append(float(content[i][99:104])/float(10))
        except:
            isd["ps"]["Data"].append(float(9999.9))
        # precipitation, mm
        if content[i][108:111] == "AA1":
            try:
                isd["Precip"]["Data"].append(float(content[i][113:117])/float(10))
            except:
                isd["Precip"]["Data"].append(float(999.9))
        else:
            isd["Precip"]["Data"].append(float(999.9))
    # add the time zone to the DateTime ataributes
    isd["DateTime"]["Attr"]["time_zone"] = "UTC"
    # get the number of records and add this to the global attributes
    nrecs = len(isd["DateTime"]["Data"])
    ds.globalattributes["nc_nrecs"] = str(nrecs)
    # define the QC flags
    f0 = numpy.zeros(len(isd["DateTime"]["Data"]))
    f1 = numpy.ones(len(isd["DateTime"]["Data"]))
    # deal with the datetime first
    variable = {"Label":"DateTime", "Data":numpy.array(isd["DateTime"]["Data"]),
                "Flag":f0, "Attr":isd["DateTime"]["Attr"]}
    qcutils.CreateVariable(ds, variable)
    # get the nominal time step
    dt_delta = qcutils.get_timestep(ds)
    ts = scipy.stats.mode(dt_delta)[0]/60
    ds.globalattributes["time_step"] = ts[0]
    # add the variables to the data structure
    logger.info("Writing data to the data structure")
    labels = [label for label in isd.keys() if label != "DateTime"]
    for label in labels:
        data = numpy.ma.masked_equal(isd[label]["Data"], isd[label]["Attr"]["missing_value"])
        flag = numpy.where(numpy.ma.getmaskarray(data) == True, f1, f0)
        attr = isd[label]["Attr"]
        variable = {"Label":label, "Data":data, "Flag":flag, "Attr":attr}
        qcutils.CreateVariable(ds, variable)
    # hPa to kPa
    ps = qcutils.GetVariable(ds, "ps")
    ps["Data"] = ps["Data"]/float(10)
    # convert sea level pressure to station pressure
    site_altitude = float(ds.globalattributes["altitude"])
    Ta = qcutils.GetVariable(ds, "Ta")
    cfac = numpy.ma.exp((-1*site_altitude)/((Ta["Data"]+273.15)*29.263))
    ps["Data"] = ps["Data"]*cfac
    ps["Attr"]["long_name"] = ps["Attr"]["long_name"]+", adjusted from sea level to station"
    qcutils.CreateVariable(ds, ps)
    # do precipitation and apply crude limits
    Precip = qcutils.GetVariable(ds, "Precip")
    condition = (Precip["Data"]<0)|(Precip["Data"]>100)
    Precip["Data"] = numpy.ma.masked_where(condition, Precip["Data"])
    Precip["Flag"] = numpy.where(numpy.ma.getmaskarray(Precip["Data"])==True, f1, f0)
    Precip["Attr"]["RangeCheck_upper"] = 100
    Precip["Attr"]["RangeCheck_lower"] = 0
    qcutils.CreateVariable(ds, Precip)
    # get the humidities from Td
    Ta = qcutils.GetVariable(ds, "Ta")
    Td = qcutils.GetVariable(ds, "Td")
    ps = qcutils.GetVariable(ds, "ps")
    RH = mf.RHfromdewpoint(Td["Data"], Ta["Data"])
    flag = numpy.where(numpy.ma.getmaskarray(RH)==True, f1, f0)
    attr = {"long_name":"Relative humidity", "units":"%"}
    variable = {"Label":"RH", "Data":RH, "Flag":flag, "Attr":attr}
    qcutils.CreateVariable(ds, variable)
    Ah = mf.absolutehumidityfromRH(Ta["Data"], RH)
    flag = numpy.where(numpy.ma.getmaskarray(Ah)==True, f1, f0)
    attr = {"long_name":"Absolute humidity", "units":"g/m3"}
    variable = {"Label":"Ah", "Data":Ah, "Flag":flag, "Attr":attr}
    qcutils.CreateVariable(ds, variable)
    q = mf.specifichumidityfromRH(RH, Ta["Data"], ps["Data"])
    flag = numpy.where(numpy.ma.getmaskarray(q)==True, f1, f0)
    attr = {"long_name":"Specific humidity", "units":"kg/kg"}
    variable = {"Label":"q", "Data":q, "Flag":flag, "Attr":attr}
    qcutils.CreateVariable(ds, variable)
    # get U and V components from wind speed and direction
    Ws = qcutils.GetVariable(ds, "Ws")
    Wd = qcutils.GetVariable(ds, "Wd")
    U, V = qcutils.convert_WSWDtoUV(Ws, Wd)
    qcutils.CreateVariable(ds, U)
    qcutils.CreateVariable(ds, V)
    # add the time variable
    qcutils.get_nctime_from_datetime(ds)
    # return the data
    return ds
Esempio n. 17
0
def compare_eddypro():
    epname = qcio.get_filename_dialog(
        title='Choose an EddyPro full output file')
    ofname = qcio.get_filename_dialog(title='Choose an L3 output file')

    ds_ep = qcio.read_eddypro_full(epname)
    ds_of = qcio.nc_read_series(ofname)

    dt_ep = ds_ep.series['DateTime']['Data']
    dt_of = ds_of.series['DateTime']['Data']

    start_datetime = max([dt_ep[0], dt_of[0]])
    end_datetime = min([dt_ep[-1], dt_of[-1]])

    si_of = qcutils.GetDateIndex(dt_of,
                                 str(start_datetime),
                                 ts=30,
                                 default=0,
                                 match='exact')
    ei_of = qcutils.GetDateIndex(dt_of,
                                 str(end_datetime),
                                 ts=30,
                                 default=len(dt_of),
                                 match='exact')
    si_ep = qcutils.GetDateIndex(dt_ep,
                                 str(start_datetime),
                                 ts=30,
                                 default=0,
                                 match='exact')
    ei_ep = qcutils.GetDateIndex(dt_ep,
                                 str(end_datetime),
                                 ts=30,
                                 default=len(dt_ep),
                                 match='exact')

    us_of = qcutils.GetVariable(ds_of, 'ustar', start=si_of, end=ei_of)
    us_ep = qcutils.GetVariable(ds_ep, 'ustar', start=si_ep, end=ei_ep)
    Fh_of = qcutils.GetVariable(ds_of, 'Fh', start=si_of, end=ei_of)
    Fh_ep = qcutils.GetVariable(ds_ep, 'Fh', start=si_ep, end=ei_ep)
    Fe_of = qcutils.GetVariable(ds_of, 'Fe', start=si_of, end=ei_of)
    Fe_ep = qcutils.GetVariable(ds_ep, 'Fe', start=si_ep, end=ei_ep)
    Fc_of = qcutils.GetVariable(ds_of, 'Fc', start=si_of, end=ei_of)
    Fc_ep = qcutils.GetVariable(ds_ep, 'Fc', start=si_ep, end=ei_ep)
    # copy the range check values from the OFQC attributes to the EP attributes
    for of, ep in zip([us_of, Fh_of, Fe_of, Fc_of],
                      [us_ep, Fh_ep, Fe_ep, Fc_ep]):
        for item in ["rangecheck_upper", "rangecheck_lower"]:
            if item in of["Attr"]:
                ep["Attr"][item] = of["Attr"][item]
    # apply QC to the EddyPro data
    qcck.ApplyRangeCheckToVariable(us_ep)
    qcck.ApplyRangeCheckToVariable(Fc_ep)
    qcck.ApplyRangeCheckToVariable(Fe_ep)
    qcck.ApplyRangeCheckToVariable(Fh_ep)
    # plot the comparison
    plt.ion()
    fig = plt.figure(1, figsize=(8, 8))
    qcplot.xyplot(us_ep["Data"],
                  us_of["Data"],
                  sub=[2, 2, 1],
                  regr=2,
                  xlabel='u*_EP (m/s)',
                  ylabel='u*_OF (m/s)')
    qcplot.xyplot(Fh_ep["Data"],
                  Fh_of["Data"],
                  sub=[2, 2, 2],
                  regr=2,
                  xlabel='Fh_EP (W/m2)',
                  ylabel='Fh_OF (W/m2)')
    qcplot.xyplot(Fe_ep["Data"],
                  Fe_of["Data"],
                  sub=[2, 2, 3],
                  regr=2,
                  xlabel='Fe_EP (W/m2)',
                  ylabel='Fe_OF (W/m2)')
    qcplot.xyplot(Fc_ep["Data"],
                  Fc_of["Data"],
                  sub=[2, 2, 4],
                  regr=2,
                  xlabel='Fc_EP (umol/m2/s)',
                  ylabel='Fc_OF (umol/m2/s)')
    plt.tight_layout()
    plt.draw()
    plt.ioff()