def interpolate_to_30minutes(ds_60minutes):
    ds_30minutes = qcio.DataStructure()
    # copy the global attributes
    for this_attr in list(ds_60minutes.globalattributes.keys()):
        ds_30minutes.globalattributes[this_attr] = ds_60minutes.globalattributes[this_attr]
    # update the global attribute "time_step"
    ds_30minutes.globalattributes["time_step"] = 30
    # generate the 30 minute datetime series
    dt_loc_60minutes = ds_60minutes.series["DateTime"]["Data"]
    dt_loc_30minutes = [x for x in perdelta(dt_loc_60minutes[0],dt_loc_60minutes[-1],datetime.timedelta(minutes=30))]
    nRecs_30minutes = len(dt_loc_30minutes)
    dt_utc_60minutes = ds_60minutes.series["DateTime_UTC"]["Data"]
    dt_utc_30minutes = [x for x in perdelta(dt_utc_60minutes[0],dt_utc_60minutes[-1],datetime.timedelta(minutes=30))]
    # update the global attribute "nc_nrecs"
    ds_30minutes.globalattributes['nc_nrecs'] = nRecs_30minutes
    ds_30minutes.series["DateTime"] = {}
    ds_30minutes.series["DateTime"]["Data"] = dt_loc_30minutes
    flag = numpy.zeros(len(dt_loc_30minutes),dtype=numpy.int32)
    ds_30minutes.series["DateTime"]["Flag"] = flag
    ds_30minutes.series["DateTime_UTC"] = {}
    ds_30minutes.series["DateTime_UTC"]["Data"] = dt_utc_30minutes
    flag = numpy.zeros(len(dt_utc_30minutes),dtype=numpy.int32)
    ds_30minutes.series["DateTime_UTC"]["Flag"] = flag
    # get the year, month etc from the datetime
    qcutils.get_xldatefromdatetime(ds_30minutes)
    qcutils.get_ymdhmsfromdatetime(ds_30minutes)
    # interpolate to 30 minutes
    nRecs_60 = len(ds_60minutes.series["DateTime"]["Data"])
    nRecs_30 = len(ds_30minutes.series["DateTime"]["Data"])
    x_60minutes = numpy.arange(0,nRecs_60,1)
    x_30minutes = numpy.arange(0,nRecs_60-0.5,0.5)
    varlist_60 = list(ds_60minutes.series.keys())
    # strip out the date and time variables already done
    for item in ["DateTime","DateTime_UTC","xlDateTime","Year","Month","Day","Hour","Minute","Second","Hdh","Hr_UTC"]:
        if item in varlist_60: varlist_60.remove(item)
    # now do the interpolation (its OK to interpolate accumulated precipitation)
    for label in varlist_60:
        series_60minutes,flag,attr = qcutils.GetSeries(ds_60minutes,label)
        ci_60minutes = numpy.zeros(len(series_60minutes))
        idx = numpy.where(abs(series_60minutes-float(c.missing_value))<c.eps)[0]
        ci_60minutes[idx] = float(1)
        int_fn = interp1d(x_60minutes,series_60minutes)
        series_30minutes = int_fn(x_30minutes)
        int_fn = interp1d(x_60minutes,ci_60minutes)
        ci_30minutes = int_fn(x_30minutes)
        idx = numpy.where(abs(ci_30minutes-float(0))>c.eps)[0]
        series_30minutes[idx] = numpy.float64(c.missing_value)
        flag_30minutes = numpy.zeros(nRecs_30, dtype=numpy.int32)
        flag_30minutes[idx] = numpy.int32(1)
        qcutils.CreateSeries(ds_30minutes,label,series_30minutes,Flag=flag_30minutes,Attr=attr)
    # get the UTC hour
    hr_utc = [float(x.hour)+float(x.minute)/60 for x in dt_utc_30minutes]
    attr = qcutils.MakeAttributeDictionary(long_name='UTC hour')
    flag_30minutes = numpy.zeros(nRecs_30, dtype=numpy.int32)
    qcutils.CreateSeries(ds_30minutes,'Hr_UTC',hr_utc,Flag=flag_30minutes,Attr=attr)
    return ds_30minutes
Example #2
0
def interpolate_ds(ds_in, ts):
    """
    Purpose:
     Interpolate the contents of a data structure onto a different time step.
    Assumptions:
    Usage:
    Author: PRI
    Date: June 2017
    """
    logger.info("Interpolating data")
    # instance the output data structure
    ds_out = qcio.DataStructure()
    # copy the global attributes
    ds_out.globalattributes = copy.deepcopy(ds_in.globalattributes)
    # add the time step
    ds_out.globalattributes["time_step"] = str(ts)
    # generate a regular time series at the required time step
    dt = ds_in.series["DateTime"]["Data"]
    dt0 = qcutils.rounddttots(dt[0], ts=ts)
    if dt0 < dt[0]:
        dt0 = dt0 + datetime.timedelta(minutes=ts)
    dt1 = qcutils.rounddttots(dt[-1], ts=ts)
    if dt1 > dt[-1]:
        dt1 = dt1 - datetime.timedelta(minutes=ts)
    idt = [result for result in qcutils.perdelta(dt0, dt1, datetime.timedelta(minutes=ts))]
    x1 = numpy.array([toTimestamp(dt[i]) for i in range(len(dt))])
    x2 = numpy.array([toTimestamp(idt[i]) for i in range(len(idt))])
    # loop over the series in the data structure and interpolate
    flag = numpy.zeros(len(idt), dtype=numpy.int32)
    attr = {"long_name":"Datetime", "units":"none"}
    ldt_var = {"Label":"DateTime", "Data":idt, "Flag":flag, "Attr":attr}
    qcutils.CreateVariable(ds_out, ldt_var)
    qcutils.get_nctime_from_datetime(ds_out)
    nrecs = len(idt)
    ds_out.globalattributes["nc_nrecs"] = nrecs
    # first, we do the air temperature, dew point temperature and surface pressure
    f0 = numpy.zeros(nrecs, dtype=numpy.int32)
    f1 = numpy.ones(nrecs, dtype=numpy.int32)
    for label in ["Ta", "Td", "ps", "RH", "Ah", "q"]:
        var_out = qcutils.create_empty_variable(label, nrecs, datetime=idt)
        var_in = qcutils.GetVariable(ds_in, label)
        var_out["Data"] = interpolate_1d(x1, var_in["Data"], x2)
        var_out["Flag"] = numpy.where(numpy.ma.getmaskarray(var_out["Data"])==True, f1, f0)
        var_out["Attr"] = copy.deepcopy(var_in["Attr"])
        qcutils.CreateVariable(ds_out, var_out)
    # now clamp the dew point so that TD <= TA
    Ta = qcutils.GetVariable(ds_out, "Ta")
    Td = qcutils.GetVariable(ds_out, "Td")
    Td["Data"] = numpy.ma.where(Td["Data"]<=Ta["Data"], x=Td["Data"], y=Ta["Data"])
    qcutils.CreateVariable(ds_out, Td)
    # now we do wind speed and direction by converting to U and V components
    interpolate_wswd(ds_in, x1, ds_out, x2)
    # and lastly, do precipitation
    interpolate_precip(ds_in, x1, ds_out, x2)

    return ds_out
Example #3
0
def l1qc(cf):
    # get the data series from the Excel file
    in_filename = qcio.get_infilenamefromcf(cf)
    if not qcutils.file_exists(in_filename, mode="quiet"):
        msg = " Input file " + in_filename + " not found ..."
        logger.error(msg)
        ds1 = qcio.DataStructure()
        ds1.returncodes = {"value": 1, "message": msg}
        return ds1
    file_name, file_extension = os.path.splitext(in_filename)
    if "csv" in file_extension.lower():
        ds1 = qcio.csv_read_series(cf)
        if ds1.returncodes["value"] != 0:
            return ds1
        # get a series of Excel datetime from the Python datetime objects
        qcutils.get_xldatefromdatetime(ds1)
    else:
        ds1 = qcio.xl_read_series(cf)
        if ds1.returncodes["value"] != 0:
            return ds1
        # get a series of Python datetime objects from the Excel datetime
        qcutils.get_datetimefromxldate(ds1)
    # get the netCDF attributes from the control file
    qcts.do_attributes(cf, ds1)
    # round the Python datetime to the nearest second
    qcutils.round_datetime(ds1, mode="nearest_second")
    #check for gaps in the Python datetime series and fix if present
    fixtimestepmethod = qcutils.get_keyvaluefromcf(cf, ["options"],
                                                   "FixTimeStepMethod",
                                                   default="round")
    if qcutils.CheckTimeStep(ds1):
        qcutils.FixTimeStep(ds1, fixtimestepmethod=fixtimestepmethod)
    # recalculate the Excel datetime
    qcutils.get_xldatefromdatetime(ds1)
    # get the Year, Month, Day etc from the Python datetime
    qcutils.get_ymdhmsfromdatetime(ds1)
    # write the processing level to a global attribute
    ds1.globalattributes['nc_level'] = str("L1")
    # get the start and end date from the datetime series unless they were
    # given in the control file
    if 'start_date' not in ds1.globalattributes.keys():
        ds1.globalattributes['start_date'] = str(
            ds1.series['DateTime']['Data'][0])
    if 'end_date' not in ds1.globalattributes.keys():
        ds1.globalattributes['end_date'] = str(
            ds1.series['DateTime']['Data'][-1])
    # calculate variances from standard deviations and vice versa
    qcts.CalculateStandardDeviations(cf, ds1)
    # create new variables using user defined functions
    qcts.DoFunctions(cf, ds1)
    # create a series of synthetic downwelling shortwave radiation
    qcts.get_synthetic_fsd(ds1)
    # check missing data and QC flags are consistent
    qcutils.CheckQCFlags(ds1)

    return ds1
Example #4
0
def l1qc_read_files(cf):
    # get the data series from the Excel file
    in_filename = qcio.get_infilenamefromcf(cf)
    if not qcutils.file_exists(in_filename, mode="quiet"):
        msg = " Input file " + in_filename + " not found ..."
        log.error(msg)
        ds1 = qcio.DataStructure()
        ds1.returncodes = {"value": 1, "message": msg}
        return ds1
    file_name, file_extension = os.path.splitext(in_filename)
    if "csv" in file_extension.lower():
        ds1 = qcio.csv_read_series(cf)
        if ds1.returncodes["value"] != 0:
            return ds1
        # get a series of Excel datetime from the Python datetime objects
        qcutils.get_xldatefromdatetime(ds1)
    else:
        ds1 = qcio.xl_read_series(cf)
        if ds1.returncodes["value"] != 0:
            return ds1
        # get a series of Python datetime objects from the Excel datetime
        qcutils.get_datetimefromxldate(ds1)
    return ds1
Example #5
0
                ax1.plot(modis_dt, evi_qc[:, i, j], 'r+')
        ax1.errorbar(modis_dt, evi_mean, yerr=evi_sd, fmt='ro')
        ax1.plot(modis_dt, evi_interp, 'g^')
        ax1.plot(modis_dt, evi_interp, 'g--')
        ax1.plot(modis_dt, evi_interp_smooth, 'y-')
        #for item in fire_dates: plt.axvline(item)
        ax2 = plt.subplot(212, sharex=ax1)
        ax2.errorbar(modis_dt, evi_mean, yerr=evi_sd, fmt='ro')
        ax2.plot(dt_UTC, evi_interp2_smooth, 'b-')
        png_filename = out_name.replace(".nc", ".png")
        fig.savefig(png_filename, format="png")
        plt.draw()
        plt.ioff()

    # create a data structure and write the global attributes
    ds = qcio.DataStructure()
    ds.series["DateTime"] = {}
    ds.globalattributes["site_name"] = site_name
    ds.globalattributes["time_zone"] = site_timezone
    ds.globalattributes["longitude"] = site_longitude
    ds.globalattributes["latitude"] = site_latitude
    ds.globalattributes["time_step"] = site_timestep
    ds.globalattributes["xl_datemode"] = str(0)
    ds.globalattributes["nc_level"] = "L1"
    # convert from UTC to local time
    site_tz = pytz.timezone(site_timezone)
    # put the time zone (UTC) into the datetime
    dt_utc = [x.replace(tzinfo=pytz.utc) for x in dt_UTC]
    # convert from UTC to local time
    dt_loc = [x.astimezone(site_tz) for x in dt_utc]
    # remove any daylight saving adjustments (towers run on standard time)
Example #6
0
def read_isd_file(isd_file_path):
    """
    Purpose:
     Reads an ISD CSV file (gz or uncompressed) and returns the data in a data structure.
    Assumptions:
    Usage:
    Author: PRI
    Date: June 2017
    """
    isd_file_name = os.path.split(isd_file_path)[1]
    msg = "Reading ISD file "+isd_file_name
    logger.info(msg)
    isd_site_id = isd_file_name.split("-")
    isd_site_id = isd_site_id[0]+"-"+isd_site_id[1]
    # read the file
    if os.path.splitext(isd_file_path)[1] == ".gz":
        with gzip.open(isd_file_path, 'rb') as fp:
            content = fp.readlines()
    else:
        with open(isd_file_path) as fp:
            content = fp.readlines()
    # get a data structure
    ds = qcio.DataStructure()
    # get the site latitude, longitude and altitude
    ds.globalattributes["altitude"] = float(content[0][46:51])
    ds.globalattributes["latitude"] = float(content[0][28:34])/float(1000)
    ds.globalattributes["longitude"] = float(content[0][34:41])/float(1000)
    ds.globalattributes["isd_site_id"] = isd_site_id
    # initialise the data structure
    isd = {}
    isd["DateTime"] = {"Data":[],"Flag":[],"Attr":{"long_name":"Datetime","units":"none"}}
    isd["Wd"] = {"Data":[],"Attr":{"long_name":"Wind direction","units":"degrees","missing_value":999}}
    isd["Ws"] = {"Data":[],"Attr":{"long_name":"Wind speed","units":"m/s","missing_value":999.9}}
    isd["Ta"] = {"Data":[],"Attr":{"long_name":"Air temperature","units":"C","missing_value":999.9}}
    isd["Td"] = {"Data":[],"Attr":{"long_name":"Dew point temperature","units":"C","missing_value":999.9}}
    isd["ps"] = {"Data":[],"Attr":{"long_name":"Surface pressure","units":"kPa","missing_value":9999.9}}
    isd["Precip"] = {"Data":[],"Attr":{"long_name":"Precipitation","units":"mm","missing_value":999.9}}
    # define the codes for good data in the ISD file
    OK_obs_code = ["AUTO ","CRN05","CRN15","FM-12","FM-15","FM-16","SY-MT"]
    # iterate over the lines in the file and decode the data
    for i in range(len(content)-1):
    #for i in range(10):
        # filter out anything other than hourly data
        if content[i][41:46] not in OK_obs_code: continue
        YY = int(content[i][15:19])
        MM = int(content[i][19:21])
        DD = int(content[i][21:23])
        HH = int(content[i][23:25])
        mm = int(content[i][25:27])
        dt = datetime.datetime(YY,MM,DD,HH,mm,0)
        #isd["DateTime"]["Data"].append(pytz.utc.localize(dt))
        isd["DateTime"]["Data"].append(dt)
        # wind direction, degT
        try:
            isd["Wd"]["Data"].append(float(content[i][60:63]))
        except:
            isd["Wd"]["Data"].append(float(999))
        # wind speed, m/s
        try:
            isd["Ws"]["Data"].append(float(content[i][65:69])/float(10))
        except:
            isd["Ws"]["Data"].append(float(999.9))
        # air temperature, C
        try:
            isd["Ta"]["Data"].append(float(content[i][87:92])/float(10))
        except:
            isd["Ta"]["Data"].append(float(999.9))
        # dew point temperature, C
        try:
            isd["Td"]["Data"].append(float(content[i][93:98])/float(10))
        except:
            isd["Td"]["Data"].append(float(999.9))
        # sea level pressure, hPa
        try:
            isd["ps"]["Data"].append(float(content[i][99:104])/float(10))
        except:
            isd["ps"]["Data"].append(float(9999.9))
        # precipitation, mm
        if content[i][108:111] == "AA1":
            try:
                isd["Precip"]["Data"].append(float(content[i][113:117])/float(10))
            except:
                isd["Precip"]["Data"].append(float(999.9))
        else:
            isd["Precip"]["Data"].append(float(999.9))
    # add the time zone to the DateTime ataributes
    isd["DateTime"]["Attr"]["time_zone"] = "UTC"
    # get the number of records and add this to the global attributes
    nrecs = len(isd["DateTime"]["Data"])
    ds.globalattributes["nc_nrecs"] = str(nrecs)
    # define the QC flags
    f0 = numpy.zeros(len(isd["DateTime"]["Data"]))
    f1 = numpy.ones(len(isd["DateTime"]["Data"]))
    # deal with the datetime first
    variable = {"Label":"DateTime", "Data":numpy.array(isd["DateTime"]["Data"]),
                "Flag":f0, "Attr":isd["DateTime"]["Attr"]}
    qcutils.CreateVariable(ds, variable)
    # get the nominal time step
    dt_delta = qcutils.get_timestep(ds)
    ts = scipy.stats.mode(dt_delta)[0]/60
    ds.globalattributes["time_step"] = ts[0]
    # add the variables to the data structure
    logger.info("Writing data to the data structure")
    labels = [label for label in isd.keys() if label != "DateTime"]
    for label in labels:
        data = numpy.ma.masked_equal(isd[label]["Data"], isd[label]["Attr"]["missing_value"])
        flag = numpy.where(numpy.ma.getmaskarray(data) == True, f1, f0)
        attr = isd[label]["Attr"]
        variable = {"Label":label, "Data":data, "Flag":flag, "Attr":attr}
        qcutils.CreateVariable(ds, variable)
    # hPa to kPa
    ps = qcutils.GetVariable(ds, "ps")
    ps["Data"] = ps["Data"]/float(10)
    # convert sea level pressure to station pressure
    site_altitude = float(ds.globalattributes["altitude"])
    Ta = qcutils.GetVariable(ds, "Ta")
    cfac = numpy.ma.exp((-1*site_altitude)/((Ta["Data"]+273.15)*29.263))
    ps["Data"] = ps["Data"]*cfac
    ps["Attr"]["long_name"] = ps["Attr"]["long_name"]+", adjusted from sea level to station"
    qcutils.CreateVariable(ds, ps)
    # do precipitation and apply crude limits
    Precip = qcutils.GetVariable(ds, "Precip")
    condition = (Precip["Data"]<0)|(Precip["Data"]>100)
    Precip["Data"] = numpy.ma.masked_where(condition, Precip["Data"])
    Precip["Flag"] = numpy.where(numpy.ma.getmaskarray(Precip["Data"])==True, f1, f0)
    Precip["Attr"]["RangeCheck_upper"] = 100
    Precip["Attr"]["RangeCheck_lower"] = 0
    qcutils.CreateVariable(ds, Precip)
    # get the humidities from Td
    Ta = qcutils.GetVariable(ds, "Ta")
    Td = qcutils.GetVariable(ds, "Td")
    ps = qcutils.GetVariable(ds, "ps")
    RH = mf.RHfromdewpoint(Td["Data"], Ta["Data"])
    flag = numpy.where(numpy.ma.getmaskarray(RH)==True, f1, f0)
    attr = {"long_name":"Relative humidity", "units":"%"}
    variable = {"Label":"RH", "Data":RH, "Flag":flag, "Attr":attr}
    qcutils.CreateVariable(ds, variable)
    Ah = mf.absolutehumidityfromRH(Ta["Data"], RH)
    flag = numpy.where(numpy.ma.getmaskarray(Ah)==True, f1, f0)
    attr = {"long_name":"Absolute humidity", "units":"g/m3"}
    variable = {"Label":"Ah", "Data":Ah, "Flag":flag, "Attr":attr}
    qcutils.CreateVariable(ds, variable)
    q = mf.specifichumidityfromRH(RH, Ta["Data"], ps["Data"])
    flag = numpy.where(numpy.ma.getmaskarray(q)==True, f1, f0)
    attr = {"long_name":"Specific humidity", "units":"kg/kg"}
    variable = {"Label":"q", "Data":q, "Flag":flag, "Attr":attr}
    qcutils.CreateVariable(ds, variable)
    # get U and V components from wind speed and direction
    Ws = qcutils.GetVariable(ds, "Ws")
    Wd = qcutils.GetVariable(ds, "Wd")
    U, V = qcutils.convert_WSWDtoUV(Ws, Wd)
    qcutils.CreateVariable(ds, U)
    qcutils.CreateVariable(ds, V)
    # add the time variable
    qcutils.get_nctime_from_datetime(ds)
    # return the data
    return ds
Example #7
0
def average_duplicate_times(ds_in, time_step):
    """
    Purpose:
     Remove duplicate time steps by averaging data with the same time stamp.
     The routine uses scipy.stats.binned_statistics() to bin the data based
     on the time (bins have width time_step and are centered on times that
     are an integral of time_step).
    Usage:
     ds_out = average_duplicate_times(ds_in, time_step=30)
    Side effects:
     The time given for the averages and sums is the end of the time period.
    Author: PRI
    Date: October 2017
    """
    logger.info("Getting data onto a regular time step")
    # get the time as a number (see attr["units"] for units)
    time_var = qcutils.GetVariable(ds_in, "time")
    # generate an array of bin edges for use by binned_statistics()
    bin_width = time_step*60
    # round the ISD start time to an integral of the time step
    t0 = time_step*60*int(time_var["Data"].data[0]/(time_step*60))
    bin_first = t0 - bin_width
    # round the ISD end time to an integral of the time step
    t1 = time_step*60*int(time_var["Data"].data[-1]/(time_step*60))
    # make sure we go 1 beyond the end time
    if t1 < time_var["Data"][-1]:
        t1 = t1 + bin_width
    # generate an array of bin edges
    bin_last = t1 + bin_width
    bins = numpy.arange(bin_first, bin_last, bin_width)
    # get the number of records in the output series
    nrecs = len(bins)-1
    # generate series of zeros and ones to be used as QC flags
    f0 = numpy.zeros(nrecs)
    f1 = numpy.ones(nrecs)
    # create an output data structure with a copy of the input global attributes
    ds_out = qcio.DataStructure()
    ds_out.globalattributes = copy.deepcopy(ds_in.globalattributes)
    # update the number of records
    ds_out.globalattributes["nc_nrecs"] = nrecs
    # get a list of variable labels but exclude the datetime, time, wind speed and direction variables
    # NB: Wind velocity components U and V will be averaged and wind speed and direction calculated
    # from these.
    labels = [label for label in ds_in.series.keys() if label not in ["DateTime", "time"]]
    # loop over variables
    for label in labels:
        # get the variable
        var_in = qcutils.GetVariable(ds_in, label)
        # indices of non-masked elements
        idx = numpy.ma.where(numpy.ma.getmaskarray(var_in["Data"]) == False)[0]
        # check to see if we have at least 1 data point to deal with
        if len(idx) != 0:
            # get the non-masked data as an ndarray
            data_in = numpy.array(var_in["Data"][idx].data)
            time_in = numpy.array(time_var["Data"][idx].data)
            # use binned_statistic() to average records with the same datetime
            if var_in["Label"][0:1] == "P" and var_in["Attr"]["units"] in ["m", "mm"]:
                # do sum for precipitation
                sums, edges, indices = scipy.stats.binned_statistic(time_in, data_in, statistic="sum", bins=bins)
                # convert output to a masked array and mask empty bins
                data_out = numpy.ma.masked_where(numpy.isfinite(sums) == False, numpy.ma.array(sums))
            else:
                # do average for everything else
                means, edges, indices = scipy.stats.binned_statistic(time_in, data_in, statistic="mean", bins=bins)
                # convert output to a masked array and mask empty bins
                data_out = numpy.ma.masked_where(numpy.isfinite(means) == False, numpy.ma.array(means))
            # generate the QC flag
            flag_out = numpy.where(numpy.ma.getmaskarray(data_out) == True, f1, f0)
            # and create the output variable
            var_out = {"Label":label, "Data":data_out, "Flag":flag_out, "Attr":var_in["Attr"]}
        else:
            # no data, so create an empty output variable
            var_out = {"Label":label, "Data":numpy.ma.masked_all(nrecs), "Flag":f1,
                       "Attr":var_in["Attr"]}
        # and write the output variable to the output data structure
        qcutils.CreateVariable(ds_out, var_out)
    # generate a series of the bin mid-points
    mids = edges[1:]
    # and convert these to a series of Python datetimes
    attr = copy.deepcopy(ds_in.series["DateTime"]["Attr"])
    ldt_out = {"Label":"DateTime", "Data":netCDF4.num2date(mids, time_var["Attr"]["units"]),
               "Flag":f0, "Attr":attr}
    # and write the datetime to the output data structure
    qcutils.CreateVariable(ds_out, ldt_out)
    qcutils.get_nctime_from_datetime(ds_out)
    # get wind speed and direction from components
    U = qcutils.GetVariable(ds_out, "u")
    V = qcutils.GetVariable(ds_out, "v")
    WS, WD = qcutils.convert_UVtoWSWD(U, V)
    qcutils.CreateVariable(ds_out, WS)
    qcutils.CreateVariable(ds_out, WD)
    return ds_out
Example #8
0
     # put the data for this site into the all sites data structure
     ds_out[site_index[isd_site]] = copy.deepcopy(ds_mlg)
     # add some useful global attributes
     ds_out[site_index[isd_site]].globalattributes["isd_site_id"] = isd_site
     ds_out[site_index[isd_site]].globalattributes["time_zone"] = time_zone
     # write out a netCDF file for each ISD site and each year
     #nc_file_name = isd_site+"_"+str(year)+".nc"
     #nc_dir_path = os.path.join(out_base_path,site,"Data","ISD")
     #if not os.path.exists(nc_dir_path):
         #os.makedirs(nc_dir_path)
     #nc_file_path = os.path.join(nc_dir_path,nc_file_name)
     #nc_file = qcio.nc_open_write(nc_file_path)
     #qcio.nc_write_series(nc_file, ds_out[site_index[isd_site]], ndims=1)
 # now we merge the data structures for each ISD station into a single data structure
 # first, instance a data structure
 ds_all = qcio.DataStructure()
 ds_all.globalattributes["latitude"] = site_info[site]["Latitude"]
 ds_all.globalattributes["longitude"] = site_info[site]["Longitude"]
 ds_all.globalattributes["altitude"] = site_info[site]["Altitude"]
 # now loop over the data structures for each ISD station and get the earliest
 # start time and the latest end time
 start_datetime = []
 end_datetime = []
 for i in list(ds_out.keys()):
     start_datetime.append(ds_out[i].series["DateTime"]["Data"][0])
     end_datetime.append(ds_out[i].series["DateTime"]["Data"][-1])
 print site, year
 start = min(start_datetime)
 end = max(end_datetime)
 # now make a datetime series at the required time step from the earliest start
 # datetime to the latest end datetime
Example #9
0
                                            "site_sa_limit",
                                            default=5)
 # index of the site in latitude dimension
 site_lat_index = int(((latitude[0] - site_latitude) / lat_resolution) +
                      0.5)
 erai_latitude = latitude[site_lat_index]
 # index of the site in longitude dimension
 if site_longitude < 0: site_longitude = float(360) + site_longitude
 site_lon_index = int((
     (site_longitude - longitude[0]) / lon_resolution) + 0.5)
 erai_longitude = longitude[site_lon_index]
 print "  Site coordinates: ", site_latitude, site_longitude
 print "  ERAI grid: ", latitude[site_lat_index], longitude[
     site_lon_index]
 # get an instance of the Datastructure
 ds_erai = qcio.DataStructure()
 ds_erai.series["DateTime"] = {}
 ds_erai.globalattributes["site_name"] = site_name
 ds_erai.globalattributes["time_zone"] = site_timezone
 ds_erai.globalattributes["latitude"] = site_latitude
 ds_erai.globalattributes["longitude"] = site_longitude
 ds_erai.globalattributes["time_step"] = site_timestep
 ds_erai.globalattributes["sa_limit"] = site_sa_limit
 ds_erai.globalattributes['xl_datemode'] = str(0)
 ds_erai.globalattributes["nc_level"] = "L1"
 # get the UTC and local datetime series
 site_tz = pytz.timezone(site_timezone)
 # now we get the datetime series at the tower time step
 tdts = datetime.timedelta(minutes=site_timestep)
 # get the start and end datetimes rounded to the nearest time steps
 # that lie between the first and last times
cf_name = qcio.get_controlfilename(path='../controlfiles',title='Choose a control file')
# get the control file contents
logging.info('Reading the control file')
cf = configobj.ConfigObj(cf_name)
# get stuff from the control file
logging.info('Getting control file contents')
site_list = list(cf["Sites"].keys())
var_list = list(cf["Variables"].keys())
# loop over sites
#site_list = ["AdelaideRiver"]
for site in site_list:
    info = get_info_dict(cf,site)
    logging.info("Processing site "+info["site_name"])
    # instance the data structures
    logging.info('Creating the data structures')
    ds_60minutes = qcio.DataStructure()
    # get a sorted list of files that match the mask in the control file
    file_list = sorted(glob.glob(info["in_filename"]))
    # read the netcdf files
    logging.info('Reading the netCDF files for '+info["site_name"])
    f = access_read_mfiles2(file_list,var_list=var_list)
    # get the data from the netCDF files and write it to the 60 minute data structure
    logging.info('Getting the ACCESS data')
    get_accessdata(cf,ds_60minutes,f,info)
    # set some global attributes
    logging.info('Setting global attributes')
    set_globalattributes(ds_60minutes,info)
    # check for time gaps in the file
    logging.info("Checking for time gaps")
    if qcutils.CheckTimeStep(ds_60minutes):
        qcutils.FixTimeStep(ds_60minutes)
Example #11
0
def read_isd_file(isd_file_path):
    """
    Purpose:
     Reads an ISD CSV file (gz or uncompressed) and returns the data in a data structure.
    Assumptions:
    Usage:
    Author: PRI
    Date: June 2017
    """
    isd_file_name = os.path.split(isd_file_path)[1]
    msg = "Reading ISD file " + isd_file_name
    logger.info(msg)
    isd_site_id = isd_file_name.split("-")
    isd_site_id = isd_site_id[0] + "-" + isd_site_id[1]
    # read the file
    if os.path.splitext(isd_file_path)[1] == ".gz":
        with gzip.open(isd_file_path, 'rb') as fp:
            content = fp.readlines()
    else:
        with open(isd_file_path) as fp:
            content = fp.readlines()
    # get a data structure
    ds = qcio.DataStructure()
    # get the site latitude, longitude and altitude
    ds.globalattributes["altitude"] = float(content[0][46:51])
    ds.globalattributes["latitude"] = float(content[0][28:34]) / float(1000)
    ds.globalattributes["longitude"] = float(content[0][34:41]) / float(1000)
    ds.globalattributes["isd_site_id"] = isd_site_id
    # initialise the data structure
    ds.series["DateTime"] = {
        "Data": [],
        "Flag": [],
        "Attr": {
            "long_name": "Datetime",
            "units": "none"
        }
    }
    ds.series["Wd"] = {
        "Data": [],
        "Flag": [],
        "Attr": {
            "long_name": "Wind direction",
            "units": "degrees"
        }
    }
    ds.series["Ws"] = {
        "Data": [],
        "Flag": [],
        "Attr": {
            "long_name": "Wind speed",
            "units": "m/s"
        }
    }
    ds.series["Ta"] = {
        "Data": [],
        "Flag": [],
        "Attr": {
            "long_name": "Air temperature",
            "units": "C"
        }
    }
    ds.series["Td"] = {
        "Data": [],
        "Flag": [],
        "Attr": {
            "long_name": "Dew point temperature",
            "units": "C"
        }
    }
    ds.series["ps"] = {
        "Data": [],
        "Flag": [],
        "Attr": {
            "long_name": "Surface pressure",
            "units": "kPa"
        }
    }
    ds.series["Precip"] = {
        "Data": [],
        "Flag": [],
        "Attr": {
            "long_name": "Precipitation",
            "units": "mm"
        }
    }
    # define the codes for good data in the ISD file
    OK_obs_code = [
        "AUTO ", "CRN05", "CRN15", "FM-12", "FM-15", "FM-16", "SY-MT"
    ]
    # iterate over the lines in the file and decode the data
    for i in range(len(content) - 1):
        #for i in range(10):
        # filter out anything other than hourly data
        if content[i][41:46] not in OK_obs_code: continue
        YY = int(content[i][15:19])
        MM = int(content[i][19:21])
        DD = int(content[i][21:23])
        HH = int(content[i][23:25])
        mm = int(content[i][25:27])
        dt = datetime.datetime(YY, MM, DD, HH, mm, 0)
        ds.series["DateTime"]["Data"].append(pytz.utc.localize(dt))
        # wind direction, degT
        try:
            ds.series["Wd"]["Data"].append(float(content[i][60:63]))
        except:
            ds.series["Wd"]["Data"].append(float(999))
        # wind speed, m/s
        try:
            ds.series["Ws"]["Data"].append(
                float(content[i][65:69]) / float(10))
        except:
            ds.series["Ws"]["Data"].append(float(999.9))
        # air temperature, C
        try:
            ds.series["Ta"]["Data"].append(
                float(content[i][87:92]) / float(10))
        except:
            ds.series["Ta"]["Data"].append(float(999.9))
        # dew point temperature, C
        try:
            ds.series["Td"]["Data"].append(
                float(content[i][93:98]) / float(10))
        except:
            ds.series["Td"]["Data"].append(float(999.9))
        # sea level pressure, hPa
        try:
            ds.series["ps"]["Data"].append(
                float(content[i][99:104]) / float(10))
        except:
            ds.series["ps"]["Data"].append(float(9999.9))
        # precipitation, mm
        if content[i][108:111] == "AA1":
            try:
                ds.series["Precip"]["Data"].append(
                    float(content[i][113:117]) / float(10))
            except:
                ds.series["Precip"]["Data"].append(float(999.9))
        else:
            ds.series["Precip"]["Data"].append(float(999.9))
    # add the time zone to the DateTime ataributes
    ds.series["DateTime"]["Attr"]["time_zone"] = "UTC"
    # convert from lists to masked arrays
    f0 = numpy.zeros(len(ds.series["DateTime"]["Data"]))
    f1 = numpy.ones(len(ds.series["DateTime"]["Data"]))
    ds.series["DateTime"]["Data"] = numpy.array(ds.series["DateTime"]["Data"])
    ds.series["DateTime"]["Flag"] = f0
    ds.globalattributes["nc_nrecs"] = len(f0)

    dt_delta = qcutils.get_timestep(ds)
    ts = scipy.stats.mode(dt_delta)[0] / 60
    ds.globalattributes["time_step"] = ts[0]

    ds.series["Wd"]["Data"] = numpy.ma.masked_equal(ds.series["Wd"]["Data"],
                                                    999)
    ds.series["Wd"]["Flag"] = numpy.where(
        numpy.ma.getmaskarray(ds.series["Wd"]["Data"]) == True, f1, f0)
    ds.series["Ws"]["Data"] = numpy.ma.masked_equal(ds.series["Ws"]["Data"],
                                                    999.9)
    ds.series["Ws"]["Flag"] = numpy.where(
        numpy.ma.getmaskarray(ds.series["Ws"]["Data"]) == True, f1, f0)
    ds.series["Ta"]["Data"] = numpy.ma.masked_equal(ds.series["Ta"]["Data"],
                                                    999.9)
    ds.series["Ta"]["Flag"] = numpy.where(
        numpy.ma.getmaskarray(ds.series["Ta"]["Data"]) == True, f1, f0)
    ds.series["Td"]["Data"] = numpy.ma.masked_equal(ds.series["Td"]["Data"],
                                                    999.9)
    ds.series["Td"]["Flag"] = numpy.where(
        numpy.ma.getmaskarray(ds.series["Td"]["Data"]) == True, f1, f0)
    # hPa to kPa
    ds.series["ps"]["Data"] = numpy.ma.masked_equal(ds.series["ps"]["Data"],
                                                    9999.9) / float(10)
    ds.series["ps"]["Flag"] = numpy.where(
        numpy.ma.getmaskarray(ds.series["ps"]["Data"]) == True, f1, f0)
    # convert sea level pressure to station pressure
    site_altitude = float(ds.globalattributes["altitude"])
    cfac = numpy.ma.exp(
        (-1 * site_altitude) / ((ds.series["Ta"]["Data"] + 273.15) * 29.263))
    ds.series["ps"]["Data"] = ds.series["ps"]["Data"] * cfac
    # do precipitation and apply crude limits
    ds.series["Precip"]["Data"] = numpy.ma.masked_equal(
        ds.series["Precip"]["Data"], 999.9)
    condition = (ds.series["Precip"]["Data"] <
                 0) | (ds.series["Precip"]["Data"] > 100)
    ds.series["Precip"]["Data"] = numpy.ma.masked_where(
        condition, ds.series["Precip"]["Data"])
    ds.series["Precip"]["Flag"] = numpy.where(
        numpy.ma.getmaskarray(ds.series["Precip"]["Data"]) == True, f1, f0)
    # get the humidities from Td
    Ta, flag, attr = qcutils.GetSeriesasMA(ds, "Ta")
    Td, flag, attr = qcutils.GetSeriesasMA(ds, "Td")
    ps, flag, attr = qcutils.GetSeriesasMA(ds, "ps")
    RH = mf.RHfromdewpoint(Td, Ta)
    flag = numpy.where(numpy.ma.getmaskarray(RH) == True, f1, f0)
    attr = {"long_name": "Relative humidity", "units": "%"}
    qcutils.CreateSeries(ds, "RH", RH, Flag=flag, Attr=attr)
    Ah = mf.absolutehumidityfromRH(Ta, RH)
    flag = numpy.where(numpy.ma.getmaskarray(Ah) == True, f1, f0)
    attr = {"long_name": "Absolute humidity", "units": "g/m3"}
    qcutils.CreateSeries(ds, "Ah", Ah, Flag=flag, Attr=attr)
    q = mf.specifichumidityfromRH(RH, Ta, ps)
    flag = numpy.where(numpy.ma.getmaskarray(q) == True, f1, f0)
    attr = {"long_name": "Specific humidity", "units": "kg/kg"}
    qcutils.CreateSeries(ds, "q", q, Flag=flag, Attr=attr)
    # return the data
    return ds
Example #12
0
def interpolate_ds(ds_in, ts, k=3):
    """
    Purpose:
     Interpolate the contents of a data structure onto a different time step.
    Assumptions:
    Usage:
    Author: PRI
    Date: June 2017
    """
    # instance the output data structure
    ds_out = qcio.DataStructure()
    # copy the global attributes
    for key in ds_in.globalattributes.keys():
        ds_out.globalattributes[key] = ds_in.globalattributes[key]
    # add the time step
    ds_out.globalattributes["time_step"] = str(ts)
    # generate a regular time series at the required time step
    dt = ds_in.series["DateTime"]["Data"]
    dt0 = dt[0] - datetime.timedelta(minutes=30)
    start = datetime.datetime(dt0.year, dt0.month, dt0.day, dt0.hour, 0, 0)
    dt1 = dt[-1] + datetime.timedelta(minutes=30)
    end = datetime.datetime(dt1.year, dt1.month, dt1.day, dt1.hour, 0, 0)
    idt = [
        result
        for result in perdelta(start, end, datetime.timedelta(minutes=ts))
    ]
    x1 = numpy.array([toTimestamp(dt[i]) for i in range(len(dt))])
    x2 = numpy.array([toTimestamp(idt[i]) for i in range(len(idt))])
    # loop over the series in the data structure and interpolate
    ds_out.series["DateTime"] = {}
    ds_out.series["DateTime"]["Data"] = idt
    ds_out.series["DateTime"]["Flag"] = numpy.zeros(len(idt))
    ds_out.series["DateTime"]["Attr"] = {
        "long_name": "Datetime",
        "units": "none"
    }
    ds_out.globalattributes["nc_nrecs"] = len(idt)
    series_list = list(ds_in.series.keys())
    if "DateTime" in series_list:
        series_list.remove("DateTime")
    for label in series_list:
        #print label
        data_in, flag_in, attr_in = qcutils.GetSeriesasMA(ds_in, label)
        # check if we are dealing with precipitation
        if "Precip" in label:
            # precipitation shouldn't be interpolated, just assign any precipitation
            # to the ISD time stamp.
            data_out = numpy.ma.zeros(len(idt), dtype=numpy.float64)
            idx = numpy.searchsorted(x2, numpy.intersect1d(x2, x1))
            data_out[idx] = data_in
        else:
            # interpolate everything else
            data_out = interpolate_1d(x1, data_in, x2)
        flag_out = numpy.zeros(len(idt))
        attr_out = attr_in
        qcutils.CreateSeries(ds_out,
                             label,
                             data_out,
                             Flag=flag_out,
                             Attr=attr_out)

    return ds_out
dt_aws_30minute_array = numpy.array(dt_aws_30minute[si_wholehour:ei_wholehour +
                                                    1])
nRecs_30minute = len(dt_aws_30minute_array)
dt_aws_2d = numpy.reshape(dt_aws_30minute_array, (nRecs_30minute / 2, 2))
dt_aws_60minute = list(dt_aws_2d[:, 1])
nRecs_60minute = len(dt_aws_60minute)

series_list = list(ds_aws_30minute.series.keys())
for item in [
        "DateTime", "Ddd", "Day", "Minute", "xlDateTime", "Hour", "time",
        "Month", "Second", "Year"
]:
    if item in series_list: series_list.remove(item)

    # get the 60 minute data structure
    ds_aws_60minute = qcio.DataStructure()
    # get the global attributes
    for item in list(ds_aws_30minute.globalattributes.keys()):
        ds_aws_60minute.globalattributes[
            item] = ds_aws_30minute.globalattributes[item]
    # overwrite with 60 minute values as appropriate
    ds_aws_60minute.globalattributes["nc_nrecs"] = str(nRecs_60minute)
    ds_aws_60minute.globalattributes["time_step"] = str(60)
    # put the Python datetime into the data structure
    ds_aws_60minute.series["DateTime"] = {}
    ds_aws_60minute.series["DateTime"]["Data"] = dt_aws_60minute
    ds_aws_60minute.series["DateTime"]["Flag"] = numpy.zeros(nRecs_60minute,
                                                             dtype=numpy.int32)
    ds_aws_60minute.series["DateTime"][
        "Attr"] = qcutils.MakeAttributeDictionary(
            long_name="DateTime in local time zone", units="None")
Example #14
0
# get the control file
cf = qcio.load_controlfile(path='../controlfiles')
if len(cf)==0: sys.exit()
start_date = cf["General"]["start_date"]
end_date = cf["General"]["end_date"]
var_list = cf["Variables"].keys()
site_list = cf["Sites"].keys()
for site in site_list:
    # get the input file mask
    infilename = cf["Sites"][site]["in_filepath"]+cf["Sites"][site]["in_filename"]
    if not os.path.isfile(infilename):
        log.error("netCDF file "+infilename+" not found, skipping ...")
        continue
    log.info("Starting site: "+site)
    # get a data structure
    ds_30 = qcio.DataStructure()
    # get the output file name
    outfilename = cf["Sites"][site]["out_filepath"]+cf["Sites"][site]["out_filename"]
    # average to 30 minutes or not
    average = True
    if not cf["Sites"][site].as_bool("average"): average = False
    # get the site time zone
    site_timezone = cf["Sites"][site]["site_timezone"]
    # read the BIOS file
    bios_ncfile = netCDF4.Dataset(infilename)
    time = bios_ncfile.variables["time"][:]
    nRecs = len(time)
    # set some global attributes
    ts = ds_30.globalattributes["time_step"] = 30
    ds_30.globalattributes["time_zone"] = site_timezone
    ds_30.globalattributes["nc_nrecs"] = nRecs