Example #1
0
def run_test(vtree_filename,sdd_filename,seed=0,enum_models=0):

    # READ SDD
    with Timer("reading vtree and sdd"):
        vtree = Vtree.read(vtree_filename)
        manager = SddManager(vtree)
        alpha = io.sdd_read(sdd_filename,manager)

    # CONVERT TO PSDD
    with Timer("converting to psdd"):
        pmanager = PSddManager(vtree)
        beta = pmanager.copy_and_normalize_sdd(alpha,vtree)
        prior = UniformSmoothing(2.0)
        #prior.initialize_psdd(beta)
        Prior.random_parameters(beta,seed=seed)

    # PRINT SOME STATS
    print "================================"
    print " sdd model count: %s" % fmt(alpha.model_count(vtree))
    print "       sdd count: %s" % fmt(alpha.count())
    print "        sdd size: %s" % fmt(alpha.size())
    print "================================"
    print "psdd model count: %s" % fmt(beta.model_count())
    print "      psdd count: %s" % fmt(beta.count())
    print "       psdd size: %s" % fmt(beta.size())
    print "================================"
    print "     theta count: %s" % fmt(beta.theta_count())
    print "      zero count: %s" % fmt(beta.zero_count())
    print "      true count: %s" % fmt(beta.true_count())

    if beta.vtree.var_count <= 10:
        print beta.as_table()
    mpe_val,mpe_inst = beta.mpe()
    mpe_val = mpe_val if beta.is_false_sdd else mpe_val/beta.theta_sum
    print "mpe: %s %.8f" % (mpe_inst,mpe_val)

    if enum_models:
        models = []
        with Timer("enumerating %d models" % enum_models):
            for model in beta.enumerate_mpe(pmanager):
                models.append(model)
                if len(models) >= enum_models: break

        for model in models[:10]:
            print model
        print "%d models (%d max)" % (len(models),10)

        """
        with Timer("evaluating %d models" % enum_models):
            for model in models:
                if not alpha.is_model(model):
                    print "error: non-model", model
                if not alpha._is_bits_and_data_clear(): # random check
                    print "error: bits or data not clear"
        """
    return beta,pmanager
Example #2
0
def run_test(vtree_filename,sdd_filename,\
             print_models=10,count_models=100,enum_models=0):
    # READ SDD
    with Timer("reading vtree and sdd"):
        vtree = Vtree.read(vtree_filename)
        manager = SddManager(vtree)
        alpha = io.sdd_read(sdd_filename,manager)

    with Timer("counting %d models" % count_models):
        for i in range(count_models):
            alpha.model_count(vtree)

    # PRINT SOME STATS
    print("================================")
    print(" sdd model count: %s" % fmt(alpha.model_count(vtree)))
    print("       sdd count: %s" % fmt(alpha.count()))
    print("        sdd size: %s" % fmt(alpha.size()))

    if print_models:
        models = []
        with Timer("enumerating models"):
            for model in alpha.models(vtree,lexical=True):
                st = "".join( str(val) for var,val in model )
                models.append(st)
                if len(models) >= print_models: break
        for model in models:
            print(model)
        print("%d models (%d max)" % (len(models),print_models))

    if enum_models:
        models = []

        with Timer("enumerating %d models" % enum_models):
            for model in alpha.models(vtree):
                models.append(model)
                if len(models) >= enum_models: break

        with Timer("evaluating %d models" % enum_models):
            for model in models:
                if not alpha.is_model(model):
                    print("error: non-model", model)
                if not alpha._is_bits_and_data_clear(): # random check
                    print("error: bits or data not clear")

    return alpha,manager
Example #3
0
def run_test(vtree_filename,sdd_filename,seed=0,enum_models=0):

    # READ SDD
    with Timer("reading vtree and sdd"):
        vtree = Vtree.read(vtree_filename)
        manager = SddManager(vtree)
        alpha = io.sdd_read(sdd_filename,manager)

    # CONVERT TO PSDD
    with Timer("converting to two psdds"):
        pmanager1 = PSddManager(vtree)
        pmanager2 = PSddManager(vtree)
        beta  = pmanager1.copy_and_normalize_sdd(alpha,vtree)
        gamma = pmanager2.copy_and_normalize_sdd(alpha,vtree)
        #prior = DirichletPrior(2.0)
        prior = UniformSmoothing(1.0)
        prior.initialize_psdd(beta)
        Prior.random_parameters(gamma,seed=(seed+1))

    # PRINT SOME STATS
    print("================================")
    print(" sdd model count: %s" % fmt(alpha.model_count(vtree)))
    print("       sdd count: %s" % fmt(alpha.count()))
    print("        sdd size: %s" % fmt(alpha.size()))
    print("================================")
    print("psdd model count: %s" % fmt(beta.model_count()))
    print("      psdd count: %s" % fmt(beta.count()))
    print("       psdd size: %s" % fmt(beta.size()))
    print("================================")
    print("     theta count: %s" % fmt(beta.theta_count()))
    print("      zero count: %s" % fmt(beta.zero_count()))
    print("      true count: %s" % fmt(beta.true_count()))

    if beta.vtree.var_count <= PSddNode._brute_force_limit:
        print("=== beta ===")
        print(beta.as_table())
        print("=== gamma ===")
        print(gamma.as_table())
        print("=== end ===")

        print("brute force:")
        print("kl(beta,gamma)  = %.8g" % beta.kl_psdd_brute_force(gamma))
        print("kl(gamma,beta)  = %.8g" % gamma.kl_psdd_brute_force(beta))
        print("kl(beta,beta)   = %.8g" % beta.kl_psdd_brute_force(beta))
        print("kl(gamma,gamma) = %.8g" % gamma.kl_psdd_brute_force(gamma))
    print("compute:")
    print("kl(beta,gamma)  = %.8g" % beta.kl_psdd(gamma))
    print("kl(gamma,beta)  = %.8g" % gamma.kl_psdd(beta))
    print("kl(beta,beta)   = %.8g" % beta.kl_psdd(beta))
    print("kl(gamma,gamma) = %.8g" % gamma.kl_psdd(gamma))

    print("compute:")
    print("kl(beta,gamma)  = %.8g" % beta.kl_psdd_alt(gamma))
    print("kl(gamma,beta)  = %.8g" % gamma.kl_psdd_alt(beta))
    print("kl(beta,beta)   = %.8g" % beta.kl_psdd_alt(beta))
    print("kl(gamma,gamma) = %.8g" % gamma.kl_psdd_alt(gamma))

    ess = 2.0
    prior = UniformSmoothing(ess)
    print("log prior (ess=%.8f,mc=%d):" % (ess,beta.model_count()))
    if beta.vtree.var_count <= PSddNode._brute_force_limit:
        print("method 1 = %.8g" % prior.log_prior_brute_force(beta))
    print("method 2 = %.8g" % prior.log_prior(beta))

    return beta,pmanager1
Example #4
0
def run_test(vtree_filename,
             sdd_filename,
             N=1024,
             seed=0,
             print_models=True,
             test_learning=True):

    # READ SDD
    with Timer("reading vtree and sdd"):
        vtree = Vtree.read(vtree_filename)
        manager = SddManager(vtree)
        alpha = io.sdd_read(sdd_filename, manager)

    # CONVERT TO PSDD
    with Timer("converting to psdd"):
        pmanager = PSddManager(vtree)
        beta = pmanager.copy_and_normalize_sdd(alpha, vtree)

    if test_learning:
        # SIMULATE DATASETS
        with Timer("simulating datasets"):
            #prior = DirichletPrior(2.0)
            prior = UniformSmoothing(1024.0)
            prior.initialize_psdd(beta)
            training = DataSet.simulate(beta, N, seed=seed)
            testing = DataSet.simulate(beta, N, seed=(seed + 1))

        # LEARN A PSDD
        with Timer("learning complete data"):
            beta.learn(training, prior)

        with Timer("evaluate log likelihood"):
            train_ll = beta.log_likelihood(training) / training.N
            test_ll = beta.log_likelihood(testing) / testing.N

    # PRINT SOME STATS
    print "================================"
    print " sdd model count: %s" % fmt(alpha.model_count(vtree))
    print "       sdd count: %s" % fmt(alpha.count())
    print "        sdd size: %s" % fmt(alpha.size())
    print "================================"
    print "psdd model count: %s" % fmt(beta.model_count())
    print "      psdd count: %s" % fmt(beta.count())
    print "       psdd size: %s" % fmt(beta.size())
    print "================================"
    print "     theta count: %s" % fmt(beta.theta_count())
    print "      zero count: %s" % fmt(beta.zero_count())
    print "      true count: %s" % fmt(beta.true_count())

    if test_learning:
        print "================================"
        print "   training size: %d" % training.N
        print "    testing size: %d" % testing.N
        print " unique training: %d" % len(training)
        print "  unique testing: %d" % len(testing)
        print "================================"
        print "     training ll: %.8f" % train_ll
        print "      testing ll: %.8f" % test_ll

        print "================================"
        print training

    value = beta.value()
    print "================================"
    print "      p(T) value: %.8f" % beta.value()

    e_inst = Inst.from_literal(1, pmanager.var_count)
    pval = beta.value(evidence=e_inst)
    e_inst = Inst.from_literal(-1, pmanager.var_count)
    nval = beta.value(evidence=e_inst)
    print "p(a)+p(~a) value: %.8f" % (pval + nval)
    print "      p(a) value: %.8f" % pval
    print "     p(~a) value: %.8f" % nval
    if value:
        print "     probability: %.8f" % beta.probability(evidence=e_inst)

    var_marginals = beta.marginals()
    value = var_marginals[0]
    check = True
    for var in xrange(1, pmanager.var_count + 1):
        e_inst = Inst.from_literal(1, pmanager.var_count)
        pval = beta.value(evidence=e_inst)
        e_inst = Inst.from_literal(-1, pmanager.var_count)
        nval = beta.value(evidence=e_inst)
        if abs(pval + nval - value) > 1e-8: check = False
    assert check
    print " marginals check: %s" % ("ok" if check else "NOT OK")

    inst = InstMap()
    inst[1] = 1
    inst[pmanager.var_count] = 0
    var_marginals = beta.marginals(evidence=inst)
    value = var_marginals[0]
    check = True
    for var in xrange(2, pmanager.var_count):
        inst[var] = 1
        pval = beta.value(evidence=inst)
        inst[var] = 0
        nval = beta.value(evidence=inst)
        del inst[var]
        if abs(pval + nval - value) > 1e-8: check = False
    assert check
    print " marginals check: %s" % ("ok" if check else "NOT OK")

    return beta, pmanager
Example #5
0
def run_test(vtree_filename, sdd_filename, N=1024, seed=0):

    # READ SDD
    with Timer("reading vtree and sdd"):
        vtree = Vtree.read(vtree_filename)
        manager = SddManager(vtree)
        alpha = io.sdd_read(sdd_filename, manager)

    # CONVERT TO PSDD
    with Timer("converting to psdd"):
        pmanager = PSddManager(vtree)
        beta = pmanager.copy_and_normalize_sdd(alpha, vtree)
        #prior = DirichletPrior(2.0)
        prior = UniformSmoothing(1024.0)
        prior.initialize_psdd(beta)

    # SIMULATE DATASETS
    with Timer("drawing samples"):
        random.seed(seed)
        training, testing = [], []
        for i in range(N):
            training.append(beta.simulate())
        for i in range(N):
            testing.append(beta.simulate())

    # SIMULATE DATASETS
    with Timer("drawing samples (into dict)"):
        random.seed(seed)
        training, testing = defaultdict(lambda: 1), defaultdict(lambda: 1)
        for i in range(N):
            training[tuple(beta.simulate())] += 1
        for i in range(N):
            testing[tuple(beta.simulate())] += 1

    # SIMULATE DATASETS
    with Timer("drawing samples new (list)"):
        random.seed(seed)
        training, testing = [], []
        for i in range(N):
            inst = [None] * (manager.var_count + 1)
            training.append(beta.simulate(inst=inst))
        for i in range(N):
            inst = [None] * (manager.var_count + 1)
            testing.append(beta.simulate(inst=inst))

    # SIMULATE DATASETS
    """
    with Timer("drawing samples new (map)"):
        random.seed(seed)
        training,testing = [],[]
        for i in xrange(N):
            training.append(beta.simulate())
        for i in xrange(N):
            testing.append(beta.simulate())
    """

    # SIMULATE DATASETS
    with Timer("simulating datasets"):
        training = DataSet.simulate(beta, N, seed=seed)
        testing = DataSet.simulate(beta, N, seed=(seed + 1))

    # PRINT SOME STATS
    print("================================")
    print(" sdd model count: %s" % fmt(alpha.model_count(vtree)))
    print("       sdd count: %s" % fmt(alpha.count()))
    print("        sdd size: %s" % fmt(alpha.size()))
    print("================================")
    print("psdd model count: %s" % fmt(beta.model_count()))
    print("      psdd count: %s" % fmt(beta.count()))
    print("       psdd size: %s" % fmt(beta.size()))
    print("================================")
    print("     theta count: %s" % fmt(beta.theta_count()))
    print("      zero count: %s" % fmt(beta.zero_count()))
    print("      true count: %s" % fmt(beta.true_count()))
    print("================================")
    print("   training size: %d" % training.N)
    print("    testing size: %d" % testing.N)
    print(" unique training: %d" % len(training))
    print("  unique testing: %d" % len(testing))

    if manager.var_count <= PSddNode._brute_force_limit:
        pass

    return beta, manager