def run_test(vtree_filename,sdd_filename,seed=0,enum_models=0): # READ SDD with Timer("reading vtree and sdd"): vtree = Vtree.read(vtree_filename) manager = SddManager(vtree) alpha = io.sdd_read(sdd_filename,manager) # CONVERT TO PSDD with Timer("converting to psdd"): pmanager = PSddManager(vtree) beta = pmanager.copy_and_normalize_sdd(alpha,vtree) prior = UniformSmoothing(2.0) #prior.initialize_psdd(beta) Prior.random_parameters(beta,seed=seed) # PRINT SOME STATS print "================================" print " sdd model count: %s" % fmt(alpha.model_count(vtree)) print " sdd count: %s" % fmt(alpha.count()) print " sdd size: %s" % fmt(alpha.size()) print "================================" print "psdd model count: %s" % fmt(beta.model_count()) print " psdd count: %s" % fmt(beta.count()) print " psdd size: %s" % fmt(beta.size()) print "================================" print " theta count: %s" % fmt(beta.theta_count()) print " zero count: %s" % fmt(beta.zero_count()) print " true count: %s" % fmt(beta.true_count()) if beta.vtree.var_count <= 10: print beta.as_table() mpe_val,mpe_inst = beta.mpe() mpe_val = mpe_val if beta.is_false_sdd else mpe_val/beta.theta_sum print "mpe: %s %.8f" % (mpe_inst,mpe_val) if enum_models: models = [] with Timer("enumerating %d models" % enum_models): for model in beta.enumerate_mpe(pmanager): models.append(model) if len(models) >= enum_models: break for model in models[:10]: print model print "%d models (%d max)" % (len(models),10) """ with Timer("evaluating %d models" % enum_models): for model in models: if not alpha.is_model(model): print "error: non-model", model if not alpha._is_bits_and_data_clear(): # random check print "error: bits or data not clear" """ return beta,pmanager
def run_test(vtree_filename,sdd_filename,\ print_models=10,count_models=100,enum_models=0): # READ SDD with Timer("reading vtree and sdd"): vtree = Vtree.read(vtree_filename) manager = SddManager(vtree) alpha = io.sdd_read(sdd_filename,manager) with Timer("counting %d models" % count_models): for i in range(count_models): alpha.model_count(vtree) # PRINT SOME STATS print("================================") print(" sdd model count: %s" % fmt(alpha.model_count(vtree))) print(" sdd count: %s" % fmt(alpha.count())) print(" sdd size: %s" % fmt(alpha.size())) if print_models: models = [] with Timer("enumerating models"): for model in alpha.models(vtree,lexical=True): st = "".join( str(val) for var,val in model ) models.append(st) if len(models) >= print_models: break for model in models: print(model) print("%d models (%d max)" % (len(models),print_models)) if enum_models: models = [] with Timer("enumerating %d models" % enum_models): for model in alpha.models(vtree): models.append(model) if len(models) >= enum_models: break with Timer("evaluating %d models" % enum_models): for model in models: if not alpha.is_model(model): print("error: non-model", model) if not alpha._is_bits_and_data_clear(): # random check print("error: bits or data not clear") return alpha,manager
def run_test(vtree_filename,sdd_filename,seed=0,enum_models=0): # READ SDD with Timer("reading vtree and sdd"): vtree = Vtree.read(vtree_filename) manager = SddManager(vtree) alpha = io.sdd_read(sdd_filename,manager) # CONVERT TO PSDD with Timer("converting to two psdds"): pmanager1 = PSddManager(vtree) pmanager2 = PSddManager(vtree) beta = pmanager1.copy_and_normalize_sdd(alpha,vtree) gamma = pmanager2.copy_and_normalize_sdd(alpha,vtree) #prior = DirichletPrior(2.0) prior = UniformSmoothing(1.0) prior.initialize_psdd(beta) Prior.random_parameters(gamma,seed=(seed+1)) # PRINT SOME STATS print("================================") print(" sdd model count: %s" % fmt(alpha.model_count(vtree))) print(" sdd count: %s" % fmt(alpha.count())) print(" sdd size: %s" % fmt(alpha.size())) print("================================") print("psdd model count: %s" % fmt(beta.model_count())) print(" psdd count: %s" % fmt(beta.count())) print(" psdd size: %s" % fmt(beta.size())) print("================================") print(" theta count: %s" % fmt(beta.theta_count())) print(" zero count: %s" % fmt(beta.zero_count())) print(" true count: %s" % fmt(beta.true_count())) if beta.vtree.var_count <= PSddNode._brute_force_limit: print("=== beta ===") print(beta.as_table()) print("=== gamma ===") print(gamma.as_table()) print("=== end ===") print("brute force:") print("kl(beta,gamma) = %.8g" % beta.kl_psdd_brute_force(gamma)) print("kl(gamma,beta) = %.8g" % gamma.kl_psdd_brute_force(beta)) print("kl(beta,beta) = %.8g" % beta.kl_psdd_brute_force(beta)) print("kl(gamma,gamma) = %.8g" % gamma.kl_psdd_brute_force(gamma)) print("compute:") print("kl(beta,gamma) = %.8g" % beta.kl_psdd(gamma)) print("kl(gamma,beta) = %.8g" % gamma.kl_psdd(beta)) print("kl(beta,beta) = %.8g" % beta.kl_psdd(beta)) print("kl(gamma,gamma) = %.8g" % gamma.kl_psdd(gamma)) print("compute:") print("kl(beta,gamma) = %.8g" % beta.kl_psdd_alt(gamma)) print("kl(gamma,beta) = %.8g" % gamma.kl_psdd_alt(beta)) print("kl(beta,beta) = %.8g" % beta.kl_psdd_alt(beta)) print("kl(gamma,gamma) = %.8g" % gamma.kl_psdd_alt(gamma)) ess = 2.0 prior = UniformSmoothing(ess) print("log prior (ess=%.8f,mc=%d):" % (ess,beta.model_count())) if beta.vtree.var_count <= PSddNode._brute_force_limit: print("method 1 = %.8g" % prior.log_prior_brute_force(beta)) print("method 2 = %.8g" % prior.log_prior(beta)) return beta,pmanager1
def run_test(vtree_filename, sdd_filename, N=1024, seed=0, print_models=True, test_learning=True): # READ SDD with Timer("reading vtree and sdd"): vtree = Vtree.read(vtree_filename) manager = SddManager(vtree) alpha = io.sdd_read(sdd_filename, manager) # CONVERT TO PSDD with Timer("converting to psdd"): pmanager = PSddManager(vtree) beta = pmanager.copy_and_normalize_sdd(alpha, vtree) if test_learning: # SIMULATE DATASETS with Timer("simulating datasets"): #prior = DirichletPrior(2.0) prior = UniformSmoothing(1024.0) prior.initialize_psdd(beta) training = DataSet.simulate(beta, N, seed=seed) testing = DataSet.simulate(beta, N, seed=(seed + 1)) # LEARN A PSDD with Timer("learning complete data"): beta.learn(training, prior) with Timer("evaluate log likelihood"): train_ll = beta.log_likelihood(training) / training.N test_ll = beta.log_likelihood(testing) / testing.N # PRINT SOME STATS print "================================" print " sdd model count: %s" % fmt(alpha.model_count(vtree)) print " sdd count: %s" % fmt(alpha.count()) print " sdd size: %s" % fmt(alpha.size()) print "================================" print "psdd model count: %s" % fmt(beta.model_count()) print " psdd count: %s" % fmt(beta.count()) print " psdd size: %s" % fmt(beta.size()) print "================================" print " theta count: %s" % fmt(beta.theta_count()) print " zero count: %s" % fmt(beta.zero_count()) print " true count: %s" % fmt(beta.true_count()) if test_learning: print "================================" print " training size: %d" % training.N print " testing size: %d" % testing.N print " unique training: %d" % len(training) print " unique testing: %d" % len(testing) print "================================" print " training ll: %.8f" % train_ll print " testing ll: %.8f" % test_ll print "================================" print training value = beta.value() print "================================" print " p(T) value: %.8f" % beta.value() e_inst = Inst.from_literal(1, pmanager.var_count) pval = beta.value(evidence=e_inst) e_inst = Inst.from_literal(-1, pmanager.var_count) nval = beta.value(evidence=e_inst) print "p(a)+p(~a) value: %.8f" % (pval + nval) print " p(a) value: %.8f" % pval print " p(~a) value: %.8f" % nval if value: print " probability: %.8f" % beta.probability(evidence=e_inst) var_marginals = beta.marginals() value = var_marginals[0] check = True for var in xrange(1, pmanager.var_count + 1): e_inst = Inst.from_literal(1, pmanager.var_count) pval = beta.value(evidence=e_inst) e_inst = Inst.from_literal(-1, pmanager.var_count) nval = beta.value(evidence=e_inst) if abs(pval + nval - value) > 1e-8: check = False assert check print " marginals check: %s" % ("ok" if check else "NOT OK") inst = InstMap() inst[1] = 1 inst[pmanager.var_count] = 0 var_marginals = beta.marginals(evidence=inst) value = var_marginals[0] check = True for var in xrange(2, pmanager.var_count): inst[var] = 1 pval = beta.value(evidence=inst) inst[var] = 0 nval = beta.value(evidence=inst) del inst[var] if abs(pval + nval - value) > 1e-8: check = False assert check print " marginals check: %s" % ("ok" if check else "NOT OK") return beta, pmanager
def run_test(vtree_filename, sdd_filename, N=1024, seed=0): # READ SDD with Timer("reading vtree and sdd"): vtree = Vtree.read(vtree_filename) manager = SddManager(vtree) alpha = io.sdd_read(sdd_filename, manager) # CONVERT TO PSDD with Timer("converting to psdd"): pmanager = PSddManager(vtree) beta = pmanager.copy_and_normalize_sdd(alpha, vtree) #prior = DirichletPrior(2.0) prior = UniformSmoothing(1024.0) prior.initialize_psdd(beta) # SIMULATE DATASETS with Timer("drawing samples"): random.seed(seed) training, testing = [], [] for i in range(N): training.append(beta.simulate()) for i in range(N): testing.append(beta.simulate()) # SIMULATE DATASETS with Timer("drawing samples (into dict)"): random.seed(seed) training, testing = defaultdict(lambda: 1), defaultdict(lambda: 1) for i in range(N): training[tuple(beta.simulate())] += 1 for i in range(N): testing[tuple(beta.simulate())] += 1 # SIMULATE DATASETS with Timer("drawing samples new (list)"): random.seed(seed) training, testing = [], [] for i in range(N): inst = [None] * (manager.var_count + 1) training.append(beta.simulate(inst=inst)) for i in range(N): inst = [None] * (manager.var_count + 1) testing.append(beta.simulate(inst=inst)) # SIMULATE DATASETS """ with Timer("drawing samples new (map)"): random.seed(seed) training,testing = [],[] for i in xrange(N): training.append(beta.simulate()) for i in xrange(N): testing.append(beta.simulate()) """ # SIMULATE DATASETS with Timer("simulating datasets"): training = DataSet.simulate(beta, N, seed=seed) testing = DataSet.simulate(beta, N, seed=(seed + 1)) # PRINT SOME STATS print("================================") print(" sdd model count: %s" % fmt(alpha.model_count(vtree))) print(" sdd count: %s" % fmt(alpha.count())) print(" sdd size: %s" % fmt(alpha.size())) print("================================") print("psdd model count: %s" % fmt(beta.model_count())) print(" psdd count: %s" % fmt(beta.count())) print(" psdd size: %s" % fmt(beta.size())) print("================================") print(" theta count: %s" % fmt(beta.theta_count())) print(" zero count: %s" % fmt(beta.zero_count())) print(" true count: %s" % fmt(beta.true_count())) print("================================") print(" training size: %d" % training.N) print(" testing size: %d" % testing.N) print(" unique training: %d" % len(training)) print(" unique testing: %d" % len(testing)) if manager.var_count <= PSddNode._brute_force_limit: pass return beta, manager