Example #1
0
def blh_estimation(inputFile, outputFile=None, storeInNetcdf=True, params=None):
    """Perform BLH estimation on all profiles of the day and write it into
    a copy of the netcdf file.
    
    
    Parameters
    ----------
    inputFile : str
        Path to the input file, as generated by raw2l1
    
    outputFile : str, default=None
        Path to the output file. Default adds ".out" before ".nc"
    
    storeInNetcdf : bool, default=True
        If True, the field 'blh_kabl', containg BLH estimation, is
        stored in the outputFile
    
    params : dict, default=None
        Dict with all settings. This function depends  on 'n_clusters'
    
    
    Returns
    -------
    blh : ndarray of shape (Nt,)
        Time series of BLH as estimated by the KABL algorithm.
    
    
    Example
    -------
    >>> from kabl import paths
    >>> from kabl import core
    >>> testFile = paths.file_defaultlidardata()
    >>> blh = core.blh_estimation(testFile)
    """

    t0 = time.time()  #::::::::::::::::::::::

    if params is None:
        params = utils.get_default_params()

    # 1. Extract the data
    # ---------------------
    loc, dateofday, lat, lon = utils.where_and_when(inputFile)
    needed_data = np.unique(np.concatenate(list(params["predictors"].values())))
    t_values, z_values, rcss = utils.extract_data(
        inputFile, to_extract=needed_data, params=params
    )

    if "rcs_0" in needed_data:
        rcs_0 = rcss["rcs_0"]
    if "rcs_1" in needed_data:
        rcs_1 = rcss["rcs_1"]
    if "rcs_2" in needed_data:
        rcs_2 = rcss["rcs_2"]

    blh = []

    # setup toolbar
    toolbar_width = int(len(t_values) / 10) + 1
    sys.stdout.write(
        "\nKABL estimation ("
        + loc
        + dateofday.strftime(", %Y/%m/%d")
        + "): [%s]" % ("." * toolbar_width)
    )
    sys.stdout.flush()
    sys.stdout.write("\b" * (toolbar_width + 1))  # return to start of line, after '['

    # Loop on all profile of the day
    for t in range(len(t_values)):
        # toolbar
        if np.mod(t, 10) == 0:
            sys.stdout.write("*")
            sys.stdout.flush()

        # 2. Prepare the data
        # ---------------------
        coords = {
            "time": dt.datetime.utcfromtimestamp(t_values[t]),
            "lat": lat,
            "lon": lon,
        }
        t_back = max(t - params["n_profiles"] + 1, 0)

        rcss = {}
        if "rcs_0" in needed_data:
            rcss["rcs_0"] = rcs_0[t_back : t + 1, :]
        if "rcs_1" in needed_data:
            rcss["rcs_1"] = rcs_1[t_back : t + 1, :]
        if "rcs_2" in needed_data:
            rcss["rcs_2"] = rcs_2[t_back : t + 1, :]

        X, Z = prepare_data(coords, z_values, rcss=rcss, params=params)

        # 3. Apply the machine learning algorithm
        # ---------------------
        if isinstance(params["n_clusters"], int):
            labels = apply_algo(X, params["n_clusters"], params=params)
        else:
            labels, n_clusters, classif_score = apply_algo_k_auto(X, params=params)

        # 4. Derive and store the BLH
        # ---------------------
        blh.append(utils.blh_from_labels(labels, Z))

    if outputFile is None:
        outputFile = paths.file_defaultoutput()

    # end toolbar
    t1 = time.time()  #::::::::::::::::::::::
    chrono = t1 - t0
    sys.stdout.write("] (" + str(np.round(chrono, 4)) + " s)\n")

    # 5. Store the new BLH estimation into a copy of the original netCDF
    # ---------------------
    if storeInNetcdf:
        utils.add_blh_to_netcdf(inputFile, outputFile, blh)

    return np.array(blh)
Example #2
0
def adabl_qualitymetrics(
    dataFile: str,
    modelFile: str,
    scalerFile: str,
    refFile: str = "indus",
    outputFile: str = "None",
    addResultsToNetcdf: bool = False,
):
    """Perform BLH estimation with ADABL on all profiles of the day and 
    write it into a copy of the netcdf file
    
    
    Parameters
    ----------
    dataFile : str
        Path to the input file, as generated by raw2l1
    
    modelFile : str
        Path to the model file (pickle object)
    
    scalerFile : str
        Path to the scaler file (pickle object)
    
    refFile : str
        Path to reference BLH estimation (handmade of manufacturer's). Default
        is the manufacturer.
    
    outputFile : str
        Path to the output file. Must be specified if addResultsToNetcdf=True
    
    addResultsToNetcdf : bool, default=False
        If True, adds the quality metrics to the existing result file specified 
        in outputFile
    
    
    
    Returns
    -------
    errl2_blh : float
        Root mean squared gap between BLH from KABL and the reference
        .. math:: \sqrt{1/N \sum_i^N (Z(i)-Zref(i))^2}
    
    errl1_blh : float
        Mean absolute gap between BLH from KABL and the reference
        .. math:: 1/N \sum_i^N \vert Z(i)-Zref(i) \vert
      
    errl0_blh : float
        Maximum absolute gap between BLH from KABL and the reference
        .. math:: \max_i \vert Z(i)-Zref(i) \vert
    
    ch_score : float
        Average Calinski-Harabasz score (the higher, the better) over
        the full day
        
    db_scores : float
        Average Davies-Bouldin score (the lower, the better) over
        the full day
    
    s_scores : float
        Average silhouette score (the higher, the better) over
        the full day
    
    chrono : float
        Computation time for the full day (seconds)
    
    n_invalid : int
        Number of BLH estimation at NaN or Inf
        """
    
    t0 = time.time()  #::::::::::::::::::::::

    # 1. Extract the data
    # ---------------------
    loc, dateofday, lat, lon = utils.where_and_when(dataFile)
    t_values, z_values, dat = utils.extract_data(
        dataFile, to_extract=["rcs_1", "rcs_2", "pbl"]
    )
    rcs_1 = dat["rcs_1"]
    rcs_2 = dat["rcs_2"]
    blh_mnf = dat["pbl"]
    sec_intheday = np.mod(t_values, 24 * 3600)

    Nt, Nz = rcs_1.shape

    # Load pre-trained model
    # ------------------------
    fc = open(modelFile, "rb")
    model = pickle.load(fc)
    fc = open(scalerFile, "rb")
    scaler = pickle.load(fc)

    blh = []

    # setup toolbar
    toolbar_width = int(len(t_values) / 10) + 1
    sys.stdout.write(
        "ADABL estimation ("
        + loc
        + dateofday.strftime(", %Y/%m/%d")
        + "): [%s]" % ("." * toolbar_width)
    )
    sys.stdout.flush()
    sys.stdout.write("\b" * (toolbar_width + 1))  # return to start of line, after '['

    # Loop on all profile of the day
    for t in range(Nt):
        # toolbar
        if np.mod(t, 10) == 0:
            if any(np.isnan(blh[-11:-1])):
                sys.stdout.write("!")
            else:
                sys.stdout.write("*")
            sys.stdout.flush()

        # 2. Prepare the data
        # ---------------------
        rcs1loc = rcs_1[t, :]
        rcs2loc = rcs_2[t, :]
        rcs1loc[rcs1loc <= 0] = 1e-5
        rcs2loc[rcs2loc <= 0] = 1e-5

        X_new = np.array(
            [
                np.repeat(sec_intheday[t], Nz),
                z_values,
                np.log10(rcs1loc),
                np.log10(rcs2loc),
            ]
        ).T
        X_new = scaler.transform(X_new)

        # 3. Apply the machine learning algorithm
        # ---------------------
        y_new = model.predict(X_new)

        # 4. Derive and store the BLH
        # ---------------------
        blh.append(utils.blh_from_labels(y_new, z_values))

    # end toolbar
    t1 = time.time()  #::::::::::::::::::::::
    chrono = t1 - t0
    sys.stdout.write("] (" + str(np.round(chrono, 4)) + " s)\n")

    if os.path.isfile(refFile):
        blh_ref = np.loadtxt(refFile)
    else:
        blh_ref = blh_mnf[:, 0]

    if addResultsToNetcdf:
        BLHS = [np.array(blh)]
        BLH_NAMES = ["BLH_ADABL"]

        msg = add_blhs_to_netcdf(outputFile, BLHS, BLH_NAMES)
        print(msg)

    errl2_blh = np.sqrt(np.nanmean((blh - blh_ref) ** 2))
    errl1_blh = np.nanmean(np.abs(blh - blh_ref))
    errl0_blh = np.nanmax(np.abs(blh - blh_ref))
    corr_blh = np.corrcoef(blh, blh_ref)[0, 1]
    n_invalid = np.sum(np.isnan(blh)) + np.sum(np.isinf(blh))

    return errl2_blh, errl1_blh, errl0_blh, corr_blh, chrono, n_invalid
Example #3
0
def quicklook_data(nc_file, max_height=4500, with_pbl=False, with_cbh=False):
    """Give a quick look of the data, only the data.
    
    Parameters
    ----------
    nc_file : str
        Path to the netcdf file containing the data
    
    max_height : {float, int}, default=4500
        Top height on the graphic
    
    with_pbl : bool, default=False
        If True, add onto the data the boundary layer height calculated
        by the manufacturer
    
    with_cbh : bool, default=False
        If True, add onto the data the first cloud base height
        calculated by the manufacturer
    
    Returns
    -------
    None
    """

    location, day, lat, lon = utils.where_and_when(nc_file)

    to_be_extracted = ["rcs_0"]
    if with_pbl:
        to_be_extracted.append("pbl")
    if with_cbh:
        to_be_extracted.append("cloud_base_height")

    t, z, dat = utils.extract_data(nc_file,
                                   max_height=max_height,
                                   to_extract=to_be_extracted)

    rcs = dat["rcs_0"]
    if "pbl" in to_be_extracted:
        pbl = dat["pbl"]
    if "cloud_base_height" in to_be_extracted:
        cbh = dat["cloud_base_height"]

    plt.figure(figsize=(14, 7))
    plt.pcolormesh(t, z, rcs.T, alpha=0.8, cmap="rainbow", vmin=-0.1, vmax=0.8)
    if with_pbl:
        pbl[pbl == -999] = np.nan
        for layer in range(pbl.shape[1]):
            plt.plot(t, pbl[:, layer], "k*")
    if with_cbh:
        cbh[cbh == -999] = np.nan
        for layer in range(cbh.shape[1]):
            plt.plot(t, cbh[:, layer], "r.")
    axes = plt.gca()
    plt.title("Lidar backscatter | " + location + " " +
              day.strftime("%Y/%m/%d"))
    axes.set_xlabel("Hour")
    axes.set_ylabel("Height (m agl)")
    plt.tight_layout()
    plt.grid(color="white", ls="solid")
    plt.colorbar(label="Range corrected signal", alpha=0.8)

    locs, labels = plt.xticks()
    labels = [
        dt.datetime.utcfromtimestamp(loc).strftime("%H:%M") for loc in locs
    ]

    axes.set_xticks(locs)
    axes.set_xticklabels(labels)
    plt.gcf().autofmt_xdate()
    plt.show(block=False)
Example #4
0
def adabl_blh_estimation(
    dataFile: str,
    modelFile: str,
    scalerFile: str,
    outputFile: bool = None,
    storeInNetcdf: bool = False,
):
    """Perform BLH estimation with ADABL on all profiles of the day and 
    write it into a copy of the netcdf file
    
    
    Parameters
    ----------
    dataFile : str
        Path to the input file, as generated by raw2l1
    
    modelFile : str
        Path to the model file (pickle object)
    
    scalerFile : str
        Path to the scaler file (pickle object)
    
    outputFile : str
        Path to the output file. Default adds ".out" before ".nc"
    
    storeInNetcdf : bool
        If True, the field 'blh_ababl', containg BLH estimation, is stored in
        the outputFile
    
    
    Returns
    -------
    blh : ndarray of shape (Nt,)
        Time series of BLH as estimated by the ADABL algorithm.
    """

    t0 = time.time()  #::::::::::::::::::::::

    # 1. Extract the data
    # ---------------------
    loc, dateofday, lat, lon = utils.where_and_when(dataFile)
    t_values, z_values, dat = utils.extract_data(
        dataFile, to_extract=["rcs_1", "rcs_2", "pbl"]
    )
    rcs_1 = dat["rcs_1"]
    rcs_2 = dat["rcs_2"]
    blh_mnf = dat["pbl"]
    sec_intheday = np.mod(t_values, 24 * 3600)

    Nt, Nz = rcs_1.shape

    # Load pre-trained model
    # ------------------------
    fc = open(modelFile, "rb")
    model = pickle.load(fc)
    fc = open(scalerFile, "rb")
    scaler = pickle.load(fc)

    blh = []

    # setup toolbar
    toolbar_width = int(len(t_values) / 10) + 1
    sys.stdout.write(
        "ADABL estimation ("
        + loc
        + dateofday.strftime(", %Y/%m/%d")
        + "): [%s]" % ("." * toolbar_width)
    )
    sys.stdout.flush()
    sys.stdout.write("\b" * (toolbar_width + 1))  # return to start of line, after '['

    # Loop on all profile of the day
    for t in range(Nt):
        # toolbar
        if np.mod(t, 10) == 0:
            if any(np.isnan(blh[-11:-1])):
                sys.stdout.write("!")
            else:
                sys.stdout.write("*")
            sys.stdout.flush()

        # 2. Prepare the data
        # ---------------------
        rcs1loc = rcs_1[t, :]
        rcs2loc = rcs_2[t, :]
        rcs1loc[rcs1loc <= 0] = 1e-5
        rcs2loc[rcs2loc <= 0] = 1e-5

        X_new = np.array(
            [
                np.repeat(sec_intheday[t], Nz),
                z_values,
                np.log10(rcs1loc),
                np.log10(rcs2loc),
            ]
        ).T
        X_new = scaler.transform(X_new)

        # 3. Apply the machine learning algorithm
        # ---------------------
        y_new = model.predict(X_new)

        # 4. Derive and store the BLH
        # ---------------------
        blh.append(utils.blh_from_labels(y_new, z_values))

    # end toolbar
    t1 = time.time()  #::::::::::::::::::::::
    chrono = t1 - t0
    sys.stdout.write("] (" + str(np.round(chrono, 4)) + " s)\n")

    if outputFile is None:
        outputFile = dataFile[:-3] + ".out.nc"

    # 5. Store the new BLH estimation into a copy of the original netCDF
    if storeInNetcdf:
        utils.add_blh_to_netcdf(dataFile, outputFile, blh, origin="adabl")

    return np.array(blh)