def test_gaussian_copula(self): n = 10000 dimkeys = ["solar", "wind"] dimension = len(dimkeys) ourmean = [2, 3] ourmeandict = {"solar": 0, "wind": 0} rho =0.5 rho2 = 0.5 ourcov = [[1, rho], [rho, 1]] ourcov2 = [[1, rho2], [rho2, 1]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])} data_array = np.random.multivariate_normal(ourmean, ourcov, 100000) data_array2 = np.random.multivariate_normal(ourmean, ourcov2, 100000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] data_dict2 = dict.fromkeys(dimkeys) for i in range(dimension): data_dict2[dimkeys[i]] = data_array2[:, i] multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.001) multigaussian2 = GaussianCopula(input_data=data_dict2, dimkeys=dimkeys, marginals=marginals, quadstep=0.001) rank_data = multigaussian2.generates_U(10000) diag(2).rank_histogram(rank_data, 20, multigaussian1)
def test_gaussian_copula(self): #not finished yet print("Warning test not finished yet") n = 10000 dimkeys = ["solar", "wind"] dimension = len(dimkeys) ourmean = [2, 3] ourmeandict = {"solar": 0, "wind": 0} rho =0.1 rho2 = 0.9 ourcov = [[1, rho], [rho, 1]] ourcov2 = [[1, rho2], [rho2, 1]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])} data_array = np.random.multivariate_normal(ourmean, ourcov, 100000) data_array2 = np.random.multivariate_normal(ourmean, ourcov2, 100000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] data_dict2 = dict.fromkeys(dimkeys) for i in range(dimension): data_dict2[dimkeys[i]] = data_array2[:, i] multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.001) multigaussian2 = GaussianCopula(input_data=data_dict2, dimkeys=dimkeys, marginals=marginals, quadstep=0.001) print(emd_sort(data_array,data_array)) print(emd_sort(data_array2, data_array)) print(emd_sort(data_array2, data_array2))
def test_weighted_combined_copula3d(self): dimkeys = ["solar", "wind", "tide"] dimension = len(dimkeys) ourmean = [0, 0, 0] ourcov = [[1, 0.1, 0.3], [0.1, 2, 0], [0.3, 0, 3]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]), "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2])} data_array = np.random.multivariate_normal(ourmean, ourcov, 10000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] copulas= ['student-copula', 'gaussian-copula'] list_of_gaussian = ['gaussian-copula','gaussian-copula'] list_of_student = ['student-copula','student-copula'] weights =[0.12,0.88] mydistr = WeightedCombinedCopula(dimkeys,data_dict,marginals,copulas,weights) gaussian = GaussianCopula(dimkeys,data_dict,marginals) weightedgaussian = WeightedCombinedCopula(dimkeys,data_dict,marginals,list_of_gaussian,weights) weightedstudent = WeightedCombinedCopula(dimkeys, data_dict, marginals, list_of_student, weights) student = StudentCopula(dimkeys,data_dict,marginals) g = gaussian.c_log_likelihood() s = student.c_log_likelihood() m = mydistr.c_log_likelihood() self.assertAlmostEqual(weightedgaussian.c_log_likelihood(),g,7) self.assertAlmostEqual(weightedstudent.c_log_likelihood(),s,7) self.assertGreater(g,m) self.assertGreater(m,s)
def test_with_gaussian_copula_3_dim(self): dimkeys = ["solar", "wind", "tide"] dimension = len(dimkeys) # dictin = {"solar": np.random.randn(200), "wind": np.random.randn(200)} ourmean = [0, 0, 0] ourcov = [[1, 0.1, 0.3], [0.1, 2, 0], [0.3, 0, 3]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1]), "tide": UnivariateNormalDistribution(var=ourcov[2][2], mean=ourmean[2])} valuedict = {"solar": 0, "wind": 0, "tide": 0} lowerdict = {"solar": -1, "wind": -1, "tide": -1} upperdict = {"solar": 1, "wind": 1, "tide": 1} data_array = np.random.multivariate_normal(ourmean, ourcov, 1000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): GaussianCopula(dimkeys, data_dict, marginals, pair_copulae_strings) data_dict[dimkeys[i]] = data_array[:, i] multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.1) multigaussian2 = MultiNormalDistribution(dimkeys, input_data=data_dict) self.assertAlmostEqual(multigaussian1.rect_prob(lowerdict, upperdict), multigaussian2.rect_prob(lowerdict, upperdict), 2) self.assertAlmostEqual(multigaussian1.rect_prob(lowerdict, upperdict),multigaussian2.rect_prob(lowerdict, upperdict), 1)
def predict(self, data, num_samples=500): cond_mask = data.train_mask eval_mask = torch.logical_xor( torch.ones_like(data.train_mask).to(dtype=torch.bool), data.train_mask) cov = self.get_cov(data) logits = self.forward(data) copula = GaussianCopula(cov) cond_cov = (cov[cond_mask, :])[:, cond_mask] cond_marginal = self.marginal(logits[cond_mask], cond_cov) eval_cov = (cov[eval_mask, :])[:, eval_mask] eval_marginal = self.marginal(logits[eval_mask], eval_cov) cond_u = torch.clamp(self.cdf(cond_marginal, data.y[cond_mask]), self.eps, 1 - self.eps) cond_idx = torch.where(cond_mask)[0] sample_idx = torch.where(eval_mask)[0] eval_u = copula.conditional_sample(cond_val=cond_u, sample_shape=[ num_samples, ], cond_idx=cond_idx, sample_idx=sample_idx) eval_u = torch.clamp(eval_u, self.eps, 1 - self.eps) eval_y = self.icdf(eval_marginal, eval_u) pred_y = data.y.clone() pred_y[eval_mask] = eval_y return pred_y
def test_with_gaussian_copula_1_dim(self): mymean = 0 myvar = 2 dimkeys1 = ["solar"] lowerdict = {"solar": -2} upperdict = {"solar": 1} data_array1 = np.random.multivariate_normal([mymean], [[myvar]], 10000) data_dict1 = {"solar": data_array1[:, 0]} marginals1 = {"solar": UnivariateNormalDistribution(input_data=data_array1[:, 0])} unigaussian1 = GaussianCopula(input_data=data_dict1, dimkeys=dimkeys1, marginals=marginals1) unigaussian2 = MultiNormalDistribution(dimkeys1, input_data=data_dict1) self.assertAlmostEqual(unigaussian1.rect_prob(lowerdict, upperdict),unigaussian2.rect_prob(lowerdict, upperdict),3)
def _copula(self, hidden): scale = self.hidden_to_tril(hidden, decomp=self.decomp) loc = torch.zeros((scale.size(0), self.code_size), dtype=scale.dtype, device=scale.device) # use covariance matrix if self.decomp == "ldl": return GaussianCopula(loc, scale_tril=scale) elif self.decomp == "cho": return GaussianCopula(loc, covariance_matrix=scale) else: raise NotImplementedError
def nll(self, data): cov = self.get_cov(data) cov = cov[data.train_mask, :] cov = cov[:, data.train_mask] logits = self.forward(data)[data.train_mask] labels = data.y[data.train_mask] copula = GaussianCopula(cov) marginal = self.marginal(logits, cov) u = self.cdf(marginal, labels) nll_copula = -copula.log_prob(u) nll_marginal = -torch.sum(marginal.log_prob(labels)) return (nll_copula + nll_marginal) / labels.size(0)
def forward(self, hidden, scale_tril): # TODO: need to parameterize cholesky factor for gaussian copula batch_size = hidden.size(0) h = torch.cat(hidden, dim=2).squeeze(0) mu = self.bnmu(self.fcmu(h)) lv = self.bnlv(self.fclv(h)) loc = torch.zeros(batch_size, self.code_size) copula = GaussianCopula(loc, scale_tril) return copula
def test_quick_dim_2(self): dimkeys = ["solar", "wind"] dimension = len(dimkeys) ourmean = [1, 0.5] ourcov = [[1, 0.3], [0.3, 2]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])} data_array = np.random.multivariate_normal(ourmean, ourcov, 10000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] pair_copulae_strings = [[None, 'student-copula'], [None, None]] valuedict = {"solar": 0.96, "wind": 0.87} CVine = CVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings) DVine = DVineCopula(dimkeys, data_dict, marginals, pair_copulae_strings) gaussiancopula = GaussianCopula(dimkeys,data_dict,marginals) gaussiancopula.c(valuedict) self.assertAlmostEqual(CVine.C(valuedict),DVine.C(valuedict),1) self.assertAlmostEqual(gaussiancopula.C(valuedict), DVine.C(valuedict), 1) self.assertAlmostEqual(CVine.C(valuedict), gaussiancopula.C(valuedict), 1)
def test_with_gaussian_copula_2_dim(self): dimkeys = ["solar", "wind"] dimension = len(dimkeys) ourmean = [3, 4] ourmeandict = {"solar": 0, "wind": 0} ourcov = [[1, 0.5], [0.5, 1]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])} valuedict = {"solar": 0, "wind": 0} lowerdict = {"solar": 2, "wind": 3} upperdict = {"solar": 4, "wind": 5} data_array = np.random.multivariate_normal(ourmean, ourcov, 100000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] multigaussian1 = GaussianCopula(input_data=data_dict, dimkeys=dimkeys, marginals=marginals, quadstep=0.001) multigaussian2 = MultiNormalDistribution(dimkeys, input_data=data_dict) valuedict = {"solar": 0.45, "wind": 0.89} self.assertAlmostEqual(multigaussian1.rect_prob(lowerdict, upperdict), multigaussian2.rect_prob(lowerdict, upperdict), 3)
def test_gaussian_copula2d(self): n = 10000 dimkeys = ["solar", "wind"] dimension = len(dimkeys) ourmean = [2, 3] ourmeandict = {"solar": 0, "wind": 0} rho = 0.5 rho2 = 0.7 ourcov = [[1, rho], [rho, 1]] ourcov2 = [[1, rho2], [rho2, 1]] marginals = {"solar": UnivariateNormalDistribution(var=ourcov[0][0], mean=ourmean[0]), "wind": UnivariateNormalDistribution(var=ourcov[1][1], mean=ourmean[1])} data_array = np.random.multivariate_normal(ourmean, ourcov, 100000) data_array2 = np.random.multivariate_normal(ourmean, ourcov2, 100000) data_dict = dict.fromkeys(dimkeys) for i in range(dimension): data_dict[dimkeys[i]] = data_array[:, i] data_dict2 = dict.fromkeys(dimkeys) for i in range(dimension): data_dict2[dimkeys[i]] = data_array2[:, i] gumbel = GumbelCopula(dimkeys, data_dict, marginals) frank = FrankCopula(dimkeys, data_dict, marginals) clayton = ClaytonCopula(dimkeys, data_dict, marginals) student = StudentCopula(dimkeys, data_dict, marginals) multigaussian1 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict, marginals=marginals, quadstep=0.001) multigaussian2 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict, marginals=marginals, quadstep=0.001, cov=ourcov2) multigaussian3 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict2, marginals=marginals, quadstep=0.001, cov=ourcov2) multigaussian4 = GaussianCopula(dimkeys=dimkeys, input_data=data_dict2, marginals=marginals, quadstep=0.001, cov=ourcov) l1=multigaussian1.c_log_likelihood() self.assertGreater(l1,multigaussian2.c_log_likelihood()) self.assertGreater(multigaussian3.c_log_likelihood(),multigaussian4.c_log_likelihood()) self.assertGreater(l1,gumbel.c_log_likelihood()) self.assertGreater(l1, clayton.c_log_likelihood()) self.assertGreater(l1, frank.c_log_likelihood()) self.assertGreater(l1, student.c_log_likelihood())
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from copula import GaussianCopula from mpl_toolkits.mplot3d import Axes3D from pycopula.visualization import pdf_2d, cdf_2d # The Clayton copula clayton = GaussianCopula(dim=2) # Visualization of CDF and PDF u, v, C = cdf_2d(clayton) u, v, c = pdf_2d(clayton) # Plotting fig = plt.figure() ax = fig.add_subplot(121, projection='3d', title="Clayton copula CDF") X, Y = np.meshgrid(u, v) ax.set_zlim(0, 5) ax.plot_surface(X, Y, c, cmap=cm.Blues) ax.plot_wireframe(X, Y, c, color='black', alpha=0.3) ax = fig.add_subplot(122, title="Clayton copula PDF") ax.contour(X, Y, c, levels=np.arange(0, 5, 0.15)) plt.show()