def generate_dirichlet_data(ndag, size): order = ndag.getTopologicalOrder() copulas = [] for k in range(order.getSize()): d = 1 + ndag.getParents(k).getSize() copulas.append(ot.Dirichlet([(1.0+k)/(d+1) for k in range(d+1)]).getCopula()) cbn = otagr.ContinuousBayesianNetwork(ndag, [ot.Uniform(0., 1.)]*ndag.getSize(), copulas) sample = cbn.getSample(size) return sample
def generate_dirichlet_data(ndag, size): order = ndag.getTopologicalOrder() jointDistributions = [] for k in range(order.getSize()): d = 1 + ndag.getParents(k).getSize() jointDistributions.append( ot.Dirichlet([(1.0 + k) / (d + 1) for k in range(d + 1)]).getCopula()) copula = otagr.ContinuousBayesianNetwork(ndag, jointDistributions) sample = copula.getSample(size) return sample
def generate_gaussian_data(ndag, size, r=0.8): order = ndag.getTopologicalOrder() copulas = [] for k in range(order.getSize()): d = 1 + ndag.getParents(k).getSize() R = ot.CorrelationMatrix(d) for i in range(d): for j in range(i): R[i, j] = r copulas.append(ot.NormalCopula(R)) cbn = otagr.ContinuousBayesianNetwork(ndag, [ot.Uniform(0., 1.)]*ndag.getSize(), copulas) sample = cbn.getSample(size) return sample
def generate_student_data(ndag, size, r=0.8): order = ndag.getTopologicalOrder() jointDistributions = [] for k in range(order.getSize()): d = 1 + ndag.getParents(k).getSize() R = ot.CorrelationMatrix(d) for i in range(d): for j in range(i): R[i, j] = r jointDistributions.append( ot.Student(5.0, [0.0] * d, [1.0] * d, R).getCopula()) copula = otagr.ContinuousBayesianNetwork(ndag, jointDistributions) sample = copula.getSample(size) return sample
marginals = [ot.Uniform(0.0, 1.0) for i in range(order.getSize())] copulas = list() for i in range(order.getSize()): d = 1 + ndag.getParents(i).getSize() print("i=", i, ", d=", d) if d == 1: copulas.append(ot.IndependentCopula(1)) else: R = ot.CorrelationMatrix(d) for i in range(d): for j in range(i): R[i, j] = 0.5 / d copulas.append( ot.Student(5.0, [0.0] * d, [1.0] * d, R).getCopula()) cbn = otagrum.ContinuousBayesianNetwork(ndag, marginals, copulas) print("cbn=", cbn) print("cbn pdf=", cbn.computePDF([0.5] * d)) print("cbn realization=", cbn.getRealization()) size = 300 sampleLearn = cbn.getSample(size) sample = cbn.getSample(size) sampleLearn.exportToCSVFile("samplelearn.csv", ",") sample.exportToCSVFile("sample.csv", ",") print("cbn sample=", sample) logL = 0.0 pdfSample = cbn.computePDF(sample) pdfSample.exportToCSVFile("pdfSample.csv", ",") for i in range(size):
m_list = [ot.Uniform(0.0, 1.0) for i in range(structure.getSize())] # Local marginals lcc_list = [] # Local Conditional Copulas for i in range(structure.getSize()): dim_lcc = structure.getParents(i).getSize() + 1 R = ot.CorrelationMatrix(dim_lcc) for j in range(dim_lcc): for k in range(j): R[j, k] = 0.6 lcc_list.append(ot.Normal([0.0] * dim_lcc, [1.0] * dim_lcc, R).getCopula()) # %% # Now that we have a NamedDAG structure and a collection of local conditional copulas, we can construct a CBN. # %% cbn = otagrum.ContinuousBayesianNetwork(structure, m_list, lcc_list) # %% # Having a CBN, we can now sample from it. # %% ot.RandomGenerator.SetSeed(10) # Set random seed sample = cbn.getSample(1000) train = sample[:-100] test = sample[-100:] # %% # Learning the structure with continuous PC: # Now that we have data, we can use it to learn the structure with the continuous PC algorithm. # %%
TTest = otagr.ContinuousTTest(train, alpha) jointDistributions = [] for i in range(order.getSize()): dim = 1 + ndag.getParents(i).getSize() if dim == 1: bernsteinCopula = ot.Uniform(0.0, 1.0) else: K = TTest.GetK(len(train), dim) indices = [int(n) for n in ndag.getParents(i)] indices = [i] + indices bernsteinCopula = ot.EmpiricalBernsteinCopula( train.getMarginal(indices), K, False) jointDistributions.append(bernsteinCopula) #print("jD", jointDistributions) cbn = otagr.ContinuousBayesianNetwork(ndag, jointDistributions) ll = 0 for t in test: #print("contribution", cbn.computeLogPDF(d)) ll += cbn.computeLogPDF(t) ll /= len(test) list_loglikelihoods.append(ll) Loglikelihoods.append(list_loglikelihoods) # Transposing result matrix Loglikelihoods = np.reshape(Loglikelihoods, (n_restart, n_samples)).transpose() Loglikelihoods = np.array(Loglikelihoods, dtype=float) ll_mean = np.mean(Loglikelihoods, axis=1).reshape((len(Loglikelihoods), 1)) ll_std = np.std(Loglikelihoods, axis=1).reshape((len(Loglikelihoods), 1))