Beispiel #1
0
class VineModel(BaseEstimator):
    """!
    @brief  Implements the fit, fit_transform interface of the sklearn
    pipeline workflow for vine copula.
    """
    def __init__(self,
                 vine_type='c',
                 trial_copula={},
                 copula_fit_method='mle',
                 vine_fit_method='ktau',
                 rank_transform=True):
        self.trial_copula_dict = trial_copula
        self._rank_transform = rank_transform
        if vine_type == 'c':
            self._vine = Cvine([])
        else:
            raise NotImplementedError

    def fit(self, X, weights=None):
        """
        @brief Fit vine copula model to data.
        @param X corrolated vectors with shape (Nsamples, Ndim). Can be
            unranked or ranked data
        """
        if not isinstance(X, pd.DataFrame):
            tstData = pd.DataFrame(X)
        else:
            tstData = X
        if self.rank_transform:
            x_r = tstData.dropna().rank() / (len(tstData) + 1)
        else:
            x_r = tstData
        self.vine = Cvine(x_r, trial_copula=self.trial_copula_dict)
        self.vine.constructVine()

    def predict(self, n):
        """!
        @brief Predict correlated output vectors given uncorrolated inputs.
        @param n int. Number of samples to draw.
        """
        return self.vine.sample(n)

    @property
    def rank_transform(self):
        return self._rank_transform

    @rank_transform.setter
    def rank_transform(self, rt):
        self._rank_transform = bool(rt)
Beispiel #2
0
 def fit(self, X, weights=None):
     """
     @brief Fit vine copula model to data.
     @param X corrolated vectors with shape (Nsamples, Ndim). Can be
         unranked or ranked data
     """
     if not isinstance(X, pd.DataFrame):
         tstData = pd.DataFrame(X)
     else:
         tstData = X
     if self.rank_transform:
         x_r = tstData.dropna().rank() / (len(tstData) + 1)
     else:
         x_r = tstData
     self.vine = Cvine(x_r, trial_copula=self.trial_copula_dict)
     self.vine.constructVine()
    def testCvineConstruct(self):
        stocks = np.loadtxt(dataDir + 'stocks.csv', delimiter=',')
        x = stocks[:, 0]
        y = stocks[:, 1]
        z = stocks[:, 4]
        p = stocks[:, 5]
        e = stocks[:, 6]
        # Create pandas data table
        tstData = pd.DataFrame()
        tstData['1a'] = x
        tstData['2b'] = y
        tstData['3c'] = z
        tstData['4d'] = p
        tstData['5e'] = e
        # Visualize multivar data
        matrixPairPlot(tstData, savefig="quad_varaite_ex.png")
        # Visualize multivar ranked data
        ranked_data = tstData.dropna().rank() / (len(tstData) + 1)
        # ranked_data['1a'] = ranked_data['1a']
        matrixPairPlot(ranked_data, savefig="quad_varaite_ranked_ex.png")

        # Init Cvine
        tstVine = Cvine(ranked_data)

        # construct the vine
        tstVine.constructVine()

        # plot vine
        tstVine.plotVine(savefig="c_vine_graph_ex.png")

        # sample from vine
        samples = tstVine.sample(n=8000)
        matrixPairPlot(samples, savefig="quad_varaite_resampled_ex.png")
Beispiel #4
0
 def __init__(self,
              vine_type='c',
              trial_copula={},
              copula_fit_method='mle',
              vine_fit_method='ktau',
              rank_transform=True):
     self.trial_copula_dict = trial_copula
     self._rank_transform = rank_transform
     if vine_type == 'c':
         self._vine = Cvine([])
     else:
         raise NotImplementedError
Beispiel #5
0
def main():
    # read data from external h5 file
    h5file = 'Cicada_cfd_180x_cht.h5.post.binned.h5'
    # store = pd.HDFStore(h5file)
    store = pt.open_file(h5file)
    bounds = h5Load(store, "Water/UO2 [Interface 1]/Temperature_bounds")
    temperature = h5Load(store, "Water/UO2 [Interface 1]/Temperature")
    tke = h5Load(store, "Water/UO2 [Interface 1]/TurbulentKineticEnergy")
    crud_thick = h5Load(store, "Water/UO2 [Interface 1]/CrudThickness")
    b10 = h5Load(store, "Water/UO2 [Interface 1]/CrudBoronDensity")
    weight = h5Load(store, "Water/UO2 [Interface 1]/Temperature_weights")
    bhf = h5Load(store, "Water/UO2 [Interface 1]/BoundaryHeatFlux")
    """
    # create multi-variate dataset for span 1
    # for zone in range(69, 81):
    for zone in range(69, 78):
        lower_b = bounds.read()[:, zone][0]
        print("Generating plot for zone: " + str(zone))
        temps = temperature.read()[:, zone][~np.isnan(temperature.read()[:, zone])]
        tkes = tke.read()[:, zone][~np.isnan(tke.read()[:, zone])]
        cruds = crud_thick.read()[:, zone][~np.isnan(crud_thick.read()[:, zone])]
        b10s = b10.read()[:, zone][~np.isnan(b10.read()[:, zone])]
        bhfs = bhf.read()[:, zone][~np.isnan(bhf.read()[:, zone])]
        weights = weight.read()[:, zone][~np.isnan(weight.read()[:, zone])]
        span_1_dataDict = {"Residual Temperature [K]": temps,
                           "Residual TKE [J/kg]": tkes,
                           "Residual BHF [W/m^2]": bhfs,
                           }
        span_1_mvd = mvd.Mvd()
        span_1_mvd.setData(span_1_dataDict, weights)
        span_1_mvd.plot(savefig="mvd_" + str(round(lower_b, 3)) + ".png", kde=False)
    """

    # upper span plot
    tsat = -618.5
    zones = range(72, 74)
    temps = temperature.read()[:,
                               zones][~np.isnan(temperature.read()[:, zones])]
    tkes = tke.read()[:, zones][~np.isnan(tke.read()[:, zones])]
    cruds = crud_thick.read()[:, zones][~np.isnan(crud_thick.read()[:, zones])]
    b10s = b10.read()[:, zones][~np.isnan(b10.read()[:, zones])]
    bhfs = bhf.read()[:, zones][~np.isnan(bhf.read()[:, zones])]
    weights = weight.read()[:, zones][~np.isnan(weight.read()[:, zones])]
    span_1_dataDict = {
        "Residual Temperature [K]": temps,
        "Residual TKE [J/kg]": tkes,
        "Residual BHF [W/m^2]": bhfs,
    }
    span_1_mvd = mvd.Mvd()
    span_1_mvd.setData(span_1_dataDict, weights)
    span_1_mvd.plot(savefig="upper_span.png", kde=False)

    # fit bivariate copula to span plot; T vs TKE:
    # copula = bvc.PairCopula(temps, tkes)
    # copula.copulaTournament()

    # init Cvine
    print("================= Construct Upper Vine =================")
    upperData = pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs})
    upperVine = Cvine(pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs}))
    upperVine.constructVine()
    upperVine.plotVine(savefig="upper_vine.png")
    print("========================================================")
    upperVineSamples = upperVine.sample(n=500)
    plt.figure(22)
    matrixPairPlot(upperVineSamples, savefig="upper_vine_samples.png")
    upper_ranked_data = upperData.dropna().rank() / (len(upperData) + 1)
    matrixPairPlot(upper_ranked_data, savefig="upper_ranked_samples.png")
    t_hat_vine, tke_hat_vine, q_hat_vine = upperVineSamples[
        't'], upperVineSamples['tke'], upperVineSamples['q']

    # plot original
    # bvc.bvJointPlot(temps, tkes, savefig="upper_t_tke_original.png")

    # sample from copula
    # print("Copula Params: " + str(copula.copulaParams))
    # t_hat, tke_hat = copula.copulaModel.sample(500)
    # bvc.bvJointPlot(t_hat_vine, tke_hat_vine, savefig="upper_t_tke_copula_sample.png")

    # rand_u = np.linspace(0.05, 0.95, 40)
    # rand_v = np.linspace(0.05, 0.95, 40)
    # u, v = np.meshgrid(rand_u, rand_v)
    # copula_pdf = copula.copulaModel.pdf(u.flatten(), v.flatten())
    # bvc.bvContourf(u.flatten(), v.flatten(), copula_pdf, savefig="upper_t_tke_copula_pdf.png")

    # Resample original data
    def icdf_uv_bisect(ux, X, marginalCDFModel):
        icdf = np.zeros(np.array(X).size)
        for i, xx in enumerate(X):
            kde_cdf_err = lambda m: xx - marginalCDFModel(-np.inf, m)
            try:
                icdf[i] = bisect(kde_cdf_err,
                                 min(ux) - np.abs(0.5 * min(ux)),
                                 max(ux) + np.abs(0.5 * max(ux)),
                                 xtol=1e-3,
                                 maxiter=15)
                icdf[i] = newton(kde_cdf_err, icdf[i], tol=1e-6, maxiter=20)
            except:
                icdf[i] = np.nan
        return icdf

    kde_cdf = gaussian_kde(temps).integrate_box
    resampled_t = icdf_uv_bisect(temps, t_hat_vine, kde_cdf)
    kde_cdf = gaussian_kde(tkes).integrate_box
    resampled_tke = icdf_uv_bisect(tkes, tke_hat_vine, kde_cdf)
    bvc.bvJointPlot(resampled_t,
                    resampled_tke,
                    vs=[temps, tkes],
                    savefig="upper_t_tke_resampled.png")

    # LOWER SPAN
    tsat = -618.5
    zones = range(70, 71)
    temps = temperature.read()[:,
                               zones][~np.isnan(temperature.read()[:, zones])]
    tkes = tke.read()[:, zones][~np.isnan(tke.read()[:, zones])]
    cruds = crud_thick.read()[:, zones][~np.isnan(crud_thick.read()[:, zones])]
    b10s = b10.read()[:, zones][~np.isnan(b10.read()[:, zones])]
    bhfs = bhf.read()[:, zones][~np.isnan(bhf.read()[:, zones])]
    weights = weight.read()[:, zones][~np.isnan(weight.read()[:, zones])]
    span_1_dataDict = {
        "Residual Temperature [K]": temps,
        "Residual TKE [J/kg]": tkes,
        "Residual BHF [W/m^2]": bhfs,
    }
    span_1_mvd = mvd.Mvd()
    span_1_mvd.setData(span_1_dataDict, weights)
    span_1_mvd.plot(savefig="lower_span.png", kde=False)

    # fit bivariate copula to span plot; T vs TKE:
    # copula = bvc.PairCopula(temps, tkes)
    # copula.copulaTournament()

    # init Cvine
    print("================= Construct Lower Vine =================")
    lowerData = pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs})
    lowerVine = Cvine(pd.DataFrame({"tke": tkes, "t": temps, "q": bhfs}))
    lowerVine.constructVine()
    plt.figure(20)
    lowerVine.plotVine(savefig="lower_vine.png")
    print("========================================================")
    lowerVineSamples = lowerVine.sample(n=500)
    matrixPairPlot(lowerVineSamples, savefig="lower_vine_samples.png")
    lower_ranked_data = lowerData.dropna().rank() / (len(lowerData) + 1)
    matrixPairPlot(lower_ranked_data, savefig="lower_ranked_samples.png")
    t_hat_vine, tke_hat_vine, q_hat_vine = lowerVineSamples[
        't'], lowerVineSamples['tke'], lowerVineSamples['q']

    # plot original
    # bvc.bvJointPlot(temps, tkes, savefig="lower_t_tke_original.png")

    # sample from copula
    # print("Copula Params: " + str(copula.copulaParams))
    # t_hat, tke_hat = copula.copulaModel.sample(500)
    # bvc.bvJointPlot(t_hat_vine, tke_hat_vine, savefig="lower_t_tke_copula_sample.png")

    # rand_u = np.linspace(0.05, 0.95, 40)
    # rand_v = np.linspace(0.05, 0.95, 40)
    # u, v = np.meshgrid(rand_u, rand_v)
    # copula_pdf = copula.copulaModel.pdf(u.flatten(), v.flatten())
    # bvc.bvContourf(u.flatten(), v.flatten(), copula_pdf, savefig="lower_t_tke_copula_pdf.png")

    # Resample original data
    def icdf_uv_bisect(ux, X, marginalCDFModel):
        icdf = np.zeros(np.array(X).size)
        for i, xx in enumerate(X):
            kde_cdf_err = lambda m: xx - marginalCDFModel(-np.inf, m)
            try:
                icdf[i] = bisect(kde_cdf_err,
                                 min(ux) - np.abs(0.5 * min(ux)),
                                 max(ux) + np.abs(0.5 * max(ux)),
                                 xtol=1e-2,
                                 maxiter=10)
                icdf[i] = newton(kde_cdf_err, icdf[i], tol=1e-6, maxiter=20)
            except:
                icdf[i] = np.nan
        return icdf

    kde_cdf = gaussian_kde(temps).integrate_box
    resampled_t = icdf_uv_bisect(temps, t_hat_vine, kde_cdf)
    kde_cdf = gaussian_kde(tkes).integrate_box
    resampled_tke = icdf_uv_bisect(tkes, tke_hat_vine, kde_cdf)
    bvc.bvJointPlot(resampled_t,
                    resampled_tke,
                    vs=[temps, tkes],
                    savefig="lower_t_tke_resampled.png")

    # Clean up
    store.close()
    def testCvineConstruct(self):
        stocks = np.loadtxt(dataDir + 'stocks.csv', delimiter=',')
        x = stocks[:, 0]
        y = stocks[:, 1]
        z = stocks[:, 4]
        p = stocks[:, 5]
        e = stocks[:, 6]
        # Create pandas data table
        tstData = pd.DataFrame()
        tstData['1a'] = x
        tstData['2b'] = y
        tstData['3c'] = z
        tstData['4d'] = p
        tstData['5e'] = e
        # Visualize multivar data
        matrixPairPlot(tstData, savefig="quad_varaite_ex.png")
        # Visualize multivar ranked data
        ranked_data = tstData.dropna().rank() / (len(tstData) + 1)
        # ranked_data['1a'] = ranked_data['1a']
        matrixPairPlot(ranked_data, savefig="quad_varaite_ranked_ex.png")

        # Init Cvine
        tstVine = Cvine(ranked_data)

        # construct the vine
        tstVine.constructVine()

        # plot vine
        tstVine.plotVine(savefig="c_vine_graph_ex.png")

        # sample from vine
        c_vine_samples = tstVine.sample(n=8000)
        matrixPairPlot(c_vine_samples, savefig="vine_resampled_ex.png")

        # check that the original data has same correlation coefficients as re-sampled
        # data from the fitted c-vine
        tst_rho_matrix = ranked_data.corr(method='pearson')
        tst_ktau_matrix = ranked_data.corr(method='kendall')
        sample_rho_matrix = c_vine_samples.corr(method='pearson')
        sample_ktau_matrix = c_vine_samples.corr(method='kendall')
        # sort by col labels
        tst_rho_matrix = tst_rho_matrix.reindex(sorted(tst_rho_matrix.columns),
                                                axis=1)
        tst_ktau_matrix = tst_ktau_matrix.reindex(sorted(
            tst_ktau_matrix.columns),
                                                  axis=1)
        sample_rho_matrix = sample_rho_matrix.reindex(sorted(
            sample_rho_matrix.columns),
                                                      axis=1)
        sample_ktau_matrix = sample_ktau_matrix.reindex(sorted(
            sample_ktau_matrix.columns),
                                                        axis=1)

        print("Original data corr matrix:")
        print(tst_rho_matrix)
        print("Vine sample corr matrix:")
        print(sample_rho_matrix)
        print("Diff:")
        print(tst_rho_matrix - sample_rho_matrix)
        self.assertTrue(
            np.allclose(tst_rho_matrix - sample_rho_matrix, 0, atol=0.10))
        self.assertTrue(
            np.allclose(tst_ktau_matrix - sample_ktau_matrix, 0, atol=0.10))

        # fit marginal distributions to original data
        marginal_dict = {}
        for col_name in tstData.columns:
            marginal_dict[col_name] = beta(*beta.fit(tstData[col_name]))
        # scale the samples
        c_vine_scaled_samples_a = tstVine.scaleSamples(c_vine_samples,
                                                       marginal_dict)
        matrixPairPlot(c_vine_scaled_samples_a,
                       savefig="vine_varaite_resampled_scaled_a.png")

        c_vine_scaled_samples_b = tstVine.sampleScale(8000, marginal_dict)

        # compute correlation coeffs
        sample_scaled_rho_matrix_a = c_vine_scaled_samples_a.corr(
            method='pearson')
        sample_scaled_rho_matrix_b = c_vine_scaled_samples_b.corr(
            method='pearson')

        # check for consistency
        self.assertTrue(
            np.allclose(tst_rho_matrix - sample_scaled_rho_matrix_a,
                        0,
                        atol=0.1))
        self.assertTrue(
            np.allclose(tst_rho_matrix - sample_scaled_rho_matrix_b,
                        0,
                        atol=0.1))
Beispiel #7
0
def main():
    # read data from external h5 file
    h5file = 'Cicada_cfd_180x_cht.h5.post.binned.h5'
    # store = pd.HDFStore(h5file)
    store = pt.open_file(h5file)
    bounds = h5Load(store, "Water/UO2 [Interface 1]/Temperature_bounds")
    temperature = h5Load(store, "Water/UO2 [Interface 1]/Temperature")
    tke = h5Load(store, "Water/UO2 [Interface 1]/TurbulentKineticEnergy")
    crud_thick = h5Load(store, "Water/UO2 [Interface 1]/CrudThickness")
    b10 = h5Load(store, "Water/UO2 [Interface 1]/CrudBoronDensity")
    weight = h5Load(store, "Water/UO2 [Interface 1]/Temperature_weights")
    bhf = h5Load(store, "Water/UO2 [Interface 1]/BoundaryHeatFlux")

    # SPAN
    tsat = -618.5
    zones = range(65, 98)
    for zone in zones:
        zBounds = bounds.read()[:, zone][~np.isnan(bounds.read()[:, zone])]
        temps = temperature.read()[:,
                                   zone][~np.isnan(temperature.read()[:,
                                                                      zone])]
        tkes = tke.read()[:, zone][~np.isnan(tke.read()[:, zone])]
        cruds = crud_thick.read()[:,
                                  zone][~np.isnan(crud_thick.read()[:, zone])]
        b10s = b10.read()[:, zone][~np.isnan(b10.read()[:, zone])]
        bhfs = bhf.read()[:, zone][~np.isnan(bhf.read()[:, zone])]
        weights = weight.read()[:, zone][~np.isnan(weight.read()[:, zone])]
        span_1_dataDict = {
            "Residual Temperature [K]": temps,
            "Residual TKE [J/kg]": tkes,
            "Residual BHF [W/m^2]": bhfs,
        }
        span_1_mvd = mvd.Mvd()
        span_1_mvd.setData(span_1_dataDict, weights)
        upper_z, lower_z = zBounds
        bounds_label = str(lower_z) + "_" + str(upper_z)
        # span_1_mvd.plot(savefig=bounds_label + "_span.png", kde=False)

        # Construct Cvine
        lowerData = pd.DataFrame({"t": temps, "tke": tkes, "q": bhfs})
        lowerVine = Cvine(pd.DataFrame({"tke": tkes, "t": temps, "q": bhfs}))
        lowerVine.constructVine()

        # Sample Cvine
        lowerVineSamples = lowerVine.sample(n=500)
        matrixPairPlot(lowerVineSamples,
                       savefig="singlePinPlots/" + bounds_label +
                       "_vine_samples.png")
        ranked_data = lowerData.dropna().rank() / (len(lowerData) + 1)
        # matrixPairPlot(ranked_data, savefig="singlePinPlots/" + bounds_label + "_ranked_samples.png")
        t_hat_vine, tke_hat_vine, q_hat_vine = lowerVineSamples[
            't'], lowerVineSamples['tke'], lowerVineSamples['q']

        kde_cdf = gaussian_kde(temps).integrate_box
        resampled_t = icdf_uv_bisect(temps, t_hat_vine, kde_cdf)
        kde_cdf = gaussian_kde(tkes).integrate_box
        resampled_tke = icdf_uv_bisect(tkes, tke_hat_vine, kde_cdf)
        # bvc.bvJointPlot(resampled_t, resampled_tke, vs=[temps, tkes],
        #                 savefig="singlePinPlots/" + bounds_label + "_t_tke_resampled.png")

        # Grow crud at resampled points
        #crudModel = Mamba1d(len(resampled_t))

        # Compare resampled crud to original crud result

    # Clean up
    store.close()