Example #1
0
    def test_gaussian_copula(self):
        n = 10000
        dimkeys = ["solar", "wind"]
        dimension = len(dimkeys)
        ourmean = [2, 3]
        ourmeandict = {"solar": 0, "wind": 0}
        rho =0.5
        rho2 = 0.5
        ourcov = [[1, rho], [rho, 1]]
        ourcov2 = [[1, rho2], [rho2, 1]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])}

        data_array = np.random.multivariate_normal(ourmean, ourcov, 100000)
        data_array2 = np.random.multivariate_normal(ourmean, ourcov2, 100000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        data_dict2 = dict.fromkeys(dimkeys)
        for i in range(dimension):
            data_dict2[dimkeys[i]] = data_array2[:, i]

        multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.001)
        multigaussian2 = GaussianCopula(input_data=data_dict2, dimkeys=dimkeys, marginals=marginals, quadstep=0.001)

        rank_data = multigaussian2.generates_U(10000)

        diag(2).rank_histogram(rank_data, 20, multigaussian1)
Example #2
0
    def test_gaussian_copula(self):
        #not finished yet
        print("Warning test not finished yet")
        n = 10000
        dimkeys = ["solar", "wind"]
        dimension = len(dimkeys)
        ourmean = [2, 3]
        ourmeandict = {"solar": 0, "wind": 0}
        rho =0.1
        rho2 = 0.9
        ourcov = [[1, rho], [rho, 1]]
        ourcov2 = [[1, rho2], [rho2, 1]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])}

        data_array = np.random.multivariate_normal(ourmean, ourcov, 100000)
        data_array2 = np.random.multivariate_normal(ourmean, ourcov2, 100000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        data_dict2 = dict.fromkeys(dimkeys)
        for i in range(dimension):
            data_dict2[dimkeys[i]] = data_array2[:, i]

        multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.001)
        multigaussian2 = GaussianCopula(input_data=data_dict2, dimkeys=dimkeys, marginals=marginals, quadstep=0.001)

        print(emd_sort(data_array,data_array))
        print(emd_sort(data_array2, data_array))
        print(emd_sort(data_array2, data_array2))
Example #3
0
    def test_weighted_combined_copula3d(self):
        dimkeys = ["solar", "wind", "tide"]
        dimension = len(dimkeys)

        ourmean = [0, 0, 0]
        ourcov = [[1, 0.1, 0.3], [0.1, 2, 0], [0.3, 0, 3]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]),
                     "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2])}
        data_array = np.random.multivariate_normal(ourmean, ourcov, 10000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        copulas= ['student-copula', 'gaussian-copula']
        list_of_gaussian = ['gaussian-copula','gaussian-copula']
        list_of_student = ['student-copula','student-copula']
        weights =[0.12,0.88]
        mydistr = WeightedCombinedCopula(dimkeys,data_dict,marginals,copulas,weights)
        gaussian = GaussianCopula(dimkeys,data_dict,marginals)
        weightedgaussian = WeightedCombinedCopula(dimkeys,data_dict,marginals,list_of_gaussian,weights)
        weightedstudent = WeightedCombinedCopula(dimkeys, data_dict, marginals, list_of_student, weights)
        student = StudentCopula(dimkeys,data_dict,marginals)
        g = gaussian.c_log_likelihood()
        s = student.c_log_likelihood()
        m = mydistr.c_log_likelihood()
        self.assertAlmostEqual(weightedgaussian.c_log_likelihood(),g,7)
        self.assertAlmostEqual(weightedstudent.c_log_likelihood(),s,7)
        self.assertGreater(g,m)
        self.assertGreater(m,s)
Example #4
0
    def test_with_gaussian_copula_3_dim(self):
        dimkeys = ["solar", "wind", "tide"]
        dimension = len(dimkeys)
        # dictin = {"solar": np.random.randn(200), "wind": np.random.randn(200)}

        ourmean = [0, 0, 0]
        ourcov = [[1, 0.1, 0.3], [0.1, 2, 0], [0.3, 0, 3]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]),
                     "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2])}
        valuedict = {"solar": 0, "wind": 0, "tide": 0}
        lowerdict = {"solar": -1, "wind": -1, "tide": -1}
        upperdict = {"solar": 1, "wind": 1, "tide": 1}

        data_array = np.random.multivariate_normal(ourmean, ourcov, 1000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            GaussianCopula(dimkeys, data_dict, marginals, pair_copulae_strings)
            data_dict[dimkeys[i]] = data_array[:, i]

        multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.1)
        multigaussian2 = MultiNormalDistribution(dimkeys, input_data=data_dict)
        self.assertAlmostEqual(multigaussian1.rect_prob(lowerdict, upperdict),
                               multigaussian2.rect_prob(lowerdict, upperdict), 2)

        self.assertAlmostEqual(multigaussian1.rect_prob(lowerdict, upperdict),multigaussian2.rect_prob(lowerdict, upperdict), 1)
Example #5
0
    def predict(self, data, num_samples=500):
        cond_mask = data.train_mask
        eval_mask = torch.logical_xor(
            torch.ones_like(data.train_mask).to(dtype=torch.bool),
            data.train_mask)
        cov = self.get_cov(data)
        logits = self.forward(data)
        copula = GaussianCopula(cov)

        cond_cov = (cov[cond_mask, :])[:, cond_mask]
        cond_marginal = self.marginal(logits[cond_mask], cond_cov)
        eval_cov = (cov[eval_mask, :])[:, eval_mask]
        eval_marginal = self.marginal(logits[eval_mask], eval_cov)

        cond_u = torch.clamp(self.cdf(cond_marginal, data.y[cond_mask]),
                             self.eps, 1 - self.eps)
        cond_idx = torch.where(cond_mask)[0]
        sample_idx = torch.where(eval_mask)[0]
        eval_u = copula.conditional_sample(cond_val=cond_u,
                                           sample_shape=[
                                               num_samples,
                                           ],
                                           cond_idx=cond_idx,
                                           sample_idx=sample_idx)
        eval_u = torch.clamp(eval_u, self.eps, 1 - self.eps)
        eval_y = self.icdf(eval_marginal, eval_u)

        pred_y = data.y.clone()
        pred_y[eval_mask] = eval_y
        return pred_y
Example #6
0
 def test_with_gaussian_copula_1_dim(self):
     mymean = 0
     myvar = 2
     dimkeys1 = ["solar"]
     lowerdict = {"solar": -2}
     upperdict = {"solar": 1}
     data_array1 = np.random.multivariate_normal([mymean], [[myvar]], 10000)
     data_dict1 = {"solar": data_array1[:, 0]}
     marginals1 = {"solar": UnivariateNormalDistribution(input_data=data_array1[:, 0])}
     unigaussian1 = GaussianCopula(input_data=data_dict1, dimkeys=dimkeys1, marginals=marginals1)
     unigaussian2 = MultiNormalDistribution(dimkeys1, input_data=data_dict1)
     self.assertAlmostEqual(unigaussian1.rect_prob(lowerdict, upperdict),unigaussian2.rect_prob(lowerdict, upperdict),3)
Example #7
0
 def _copula(self, hidden):
     scale = self.hidden_to_tril(hidden, decomp=self.decomp)
     loc = torch.zeros((scale.size(0), self.code_size),
                       dtype=scale.dtype,
                       device=scale.device)
     # use covariance matrix
     if self.decomp == "ldl":
         return GaussianCopula(loc, scale_tril=scale)
     elif self.decomp == "cho":
         return GaussianCopula(loc, covariance_matrix=scale)
     else:
         raise NotImplementedError
Example #8
0
    def nll(self, data):
        cov = self.get_cov(data)
        cov = cov[data.train_mask, :]
        cov = cov[:, data.train_mask]
        logits = self.forward(data)[data.train_mask]
        labels = data.y[data.train_mask]

        copula = GaussianCopula(cov)
        marginal = self.marginal(logits, cov)

        u = self.cdf(marginal, labels)
        nll_copula = -copula.log_prob(u)
        nll_marginal = -torch.sum(marginal.log_prob(labels))
        return (nll_copula + nll_marginal) / labels.size(0)
Example #9
0
    def forward(self, hidden, scale_tril):
        # TODO: need to parameterize cholesky factor for gaussian copula
        batch_size = hidden.size(0)

        h = torch.cat(hidden, dim=2).squeeze(0)
        mu = self.bnmu(self.fcmu(h))
        lv = self.bnlv(self.fclv(h))

        loc = torch.zeros(batch_size, self.code_size)
        copula = GaussianCopula(loc, scale_tril)
        return copula
Example #10
0
    def test_quick_dim_2(self):
        dimkeys = ["solar", "wind"]
        dimension = len(dimkeys)

        ourmean = [1, 0.5]
        ourcov = [[1, 0.3], [0.3, 2]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])}
        data_array = np.random.multivariate_normal(ourmean, ourcov, 10000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        pair_copulae_strings = [[None, 'student-copula'],
                                [None, None]]

        valuedict = {"solar": 0.96, "wind": 0.87}
        CVine = CVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings)
        DVine = DVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings)
        gaussiancopula = GaussianCopula(dimkeys,data_dict,marginals)
        gaussiancopula.c(valuedict)
        self.assertAlmostEqual(CVine.C(valuedict),DVine.C(valuedict),1)
        self.assertAlmostEqual(gaussiancopula.C(valuedict), DVine.C(valuedict), 1)
        self.assertAlmostEqual(CVine.C(valuedict), gaussiancopula.C(valuedict), 1)
Example #11
0
    def test_with_gaussian_copula_2_dim(self):
        dimkeys = ["solar", "wind"]
        dimension = len(dimkeys)
        ourmean = [3, 4]
        ourmeandict = {"solar": 0, "wind": 0}
        ourcov = [[1, 0.5], [0.5, 1]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])}
        valuedict = {"solar": 0, "wind": 0}
        lowerdict = {"solar": 2, "wind": 3}
        upperdict = {"solar": 4, "wind": 5}

        data_array = np.random.multivariate_normal(ourmean, ourcov, 100000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.001)
        multigaussian2 = MultiNormalDistribution(dimkeys, input_data=data_dict)
        valuedict = {"solar": 0.45, "wind": 0.89}
        self.assertAlmostEqual(multigaussian1.rect_prob(lowerdict, upperdict),
                               multigaussian2.rect_prob(lowerdict, upperdict), 3)
Example #12
0
    def test_gaussian_copula2d(self):
        n = 10000
        dimkeys = ["solar", "wind"]
        dimension = len(dimkeys)
        ourmean = [2, 3]
        ourmeandict = {"solar": 0, "wind": 0}
        rho = 0.5
        rho2 = 0.7
        ourcov = [[1, rho], [rho, 1]]
        ourcov2 = [[1, rho2], [rho2, 1]]
        marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]),
                     "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])}

        data_array = np.random.multivariate_normal(ourmean, ourcov, 100000)
        data_array2 = np.random.multivariate_normal(ourmean, ourcov2, 100000)
        data_dict = dict.fromkeys(dimkeys)

        for i in range(dimension):
            data_dict[dimkeys[i]] = data_array[:, i]

        data_dict2 = dict.fromkeys(dimkeys)
        for i in range(dimension):
            data_dict2[dimkeys[i]] = data_array2[:, i]

        gumbel = GumbelCopula(dimkeys, data_dict, marginals)
        frank = FrankCopula(dimkeys, data_dict, marginals)
        clayton = ClaytonCopula(dimkeys, data_dict, marginals)
        student = StudentCopula(dimkeys, data_dict, marginals)

        multigaussian1 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict, marginals=marginals, quadstep=0.001)
        multigaussian2 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict, marginals=marginals, quadstep=0.001,
                                        cov=ourcov2)
        multigaussian3 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict2, marginals=marginals, quadstep=0.001,
                                        cov=ourcov2)
        multigaussian4 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict2, marginals=marginals, quadstep=0.001,
                                        cov=ourcov)


        l1=multigaussian1.c_log_likelihood()
        self.assertGreater(l1,multigaussian2.c_log_likelihood())
        self.assertGreater(multigaussian3.c_log_likelihood(),multigaussian4.c_log_likelihood())
        self.assertGreater(l1,gumbel.c_log_likelihood())
        self.assertGreater(l1, clayton.c_log_likelihood())
        self.assertGreater(l1, frank.c_log_likelihood())
        self.assertGreater(l1, student.c_log_likelihood())
Example #13
0
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from copula import GaussianCopula
from mpl_toolkits.mplot3d import Axes3D
from pycopula.visualization import pdf_2d, cdf_2d

# The Clayton copula
clayton = GaussianCopula(dim=2)

# Visualization of CDF and PDF
u, v, C = cdf_2d(clayton)
u, v, c = pdf_2d(clayton)

# Plotting
fig = plt.figure()
ax = fig.add_subplot(121, projection='3d', title="Clayton copula CDF")
X, Y = np.meshgrid(u, v)

ax.set_zlim(0, 5)
ax.plot_surface(X, Y, c, cmap=cm.Blues)
ax.plot_wireframe(X, Y, c, color='black', alpha=0.3)

ax = fig.add_subplot(122, title="Clayton copula PDF")
ax.contour(X, Y, c, levels=np.arange(0, 5, 0.15))

plt.show()