Exemple #1
0
def probs(spn, ranges):
    ranges = np.array(ranges)
    return Inference.likelihood(
        spn,
        ranges,
        dtype=np.float64,
        node_likelihood=inference_support_ranges).reshape(len(ranges))
Exemple #2
0
    def test_inference_results(self):
        np.random.seed(123)
        tf.set_random_seed(123)

        num_dims = 20

        rg = region_graph.RegionGraph(range(num_dims))
        for _ in range(0, 10):
            rg.random_split(2, 3)

        args = RAT_SPN.SpnArgs()
        args.normalized_sums = True
        spn = RAT_SPN.RatSpn(10, region_graph=rg, name="obj-spn", args=args)

        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        dummy_input = np.random.normal(0.0, 1.2, [10, num_dims])
        input_ph = tf.placeholder(tf.float32, [10, num_dims])
        output_tensor = spn.forward(input_ph)
        tf_output = sess.run(output_tensor, feed_dict={input_ph: dummy_input})

        output_nodes = spn.get_simple_spn(sess)
        simple_output = []
        for node in output_nodes:
            simple_output.append(Inference.likelihood(node, dummy_input))
        simple_output = np.stack(simple_output)
        deviation = simple_output / np.exp(tf_output)
        rel_error = np.abs(deviation - 1.0)
        # print(rel_error)

        self.assertTrue(np.all(rel_error < 1e-2))
Exemple #3
0
    args.num_sums = 2
    args.num_gauss = 2
    spn = RAT_SPN.RatSpn(10, region_graph=rg, name="obj-spn", args=args)
    print("num_params", spn.num_params())

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    (train_im, train_labels), _ = load_mnist()
    train_spn(spn, train_im, train_labels, num_epochs=3, sess=sess)

    # dummy_input = np.random.normal(0.0, 1.2, [10, 9])
    dummy_input = train_im[:5]
    input_ph = tf.placeholder(tf.float32, dummy_input.shape)
    output_tensor = spn.forward(input_ph)
    tf_output = sess.run(output_tensor, feed_dict={input_ph: dummy_input})

    output_nodes = spn.get_simple_spn(sess)
    simple_output = []
    for node in output_nodes:
        simple_output.append(inference.log_likelihood(node, dummy_input)[:, 0])
    # graphics.plot_spn2(output_nodes[0])
    # graphics.plot_spn_to_svg(output_nodes[0])
    simple_output = np.stack(simple_output, axis=-1)
    print(tf_output, simple_output)
    simple_output = softmax(simple_output, axis=1)
    tf_output = softmax(tf_output, axis=1) + 1e-100
    print(tf_output, simple_output)
    relative_error = np.abs(simple_output / tf_output - 1)
    print(np.average(relative_error))
Exemple #4
0
@author: Alejandro Molina
'''

from spn.algorithms import Inference
from spn.algorithms.StructureLearning import learn_structure
from spn.algorithms.splitting.Clustering import get_split_rows_KMeans
from spn.algorithms.splitting.RDC import get_split_cols_RDC
from spn.data.datasets import get_nips_data
from spn.structure.Base import Context
from spn.structure.leaves.Histograms import add_domains, create_histogram_leaf

if __name__ == '__main__':
    import numpy as np

    ds_name, words, data, train, _, statistical_type, _ = get_nips_data()

    print(words)

    print(data)

    ds_context = Context()
    ds_context.statistical_type = np.asarray(["discrete"] * data.shape[1])

    add_domains(data, ds_context)

    spn = learn_structure(data, ds_context, get_split_rows_KMeans(),
                          get_split_cols_RDC(), create_histogram_leaf)

    # print(to_str_equation(spn, words))
    print(Inference.likelihood(spn, data[0:100, :]))
def visualize_Density_2d(spn):

    from spn.experiments.AQP.Ranges import NominalRange, NumericRange
    from spn.algorithms import Inference
    from simple_spn.InferenceRange import categorical_likelihood_range, gaussian_likelihood_range
    from simple_spn.UpdateRange import categorical_update_range
    from spn.experiments.AQP.Ranges import NominalRange, NumericRange
    from spn.structure.Base import Sum, Product
    from spn.algorithms.Inference import sum_likelihood, prod_likelihood
    from spn.structure.leaves.parametric.Parametric import Gaussian, Categorical

    distribution_update_ranges = {
        Gaussian: None,
        Categorical: categorical_update_range
    }

    inference_support_ranges = {
        Gaussian: gaussian_likelihood_range,
        Categorical: categorical_likelihood_range,
        Sum: sum_likelihood,
        Product: prod_likelihood
    }

    import matplotlib.pyplot as plt
    _, axes = plt.subplots(1,
                           3,
                           figsize=(15, 10),
                           squeeze=False,
                           sharey=False,
                           sharex=True)
    x_vals = np.linspace(0, 1, num=50)
    y_vals = np.linspace(0, 1, num=50)
    X, Y = np.meshgrid(x_vals, y_vals)

    ranges = []
    vals = []
    for y_val in y_vals:
        print(y_val)
        ranges = []
        for x_val in x_vals:
            ranges.append([
                NumericRange([[x_val]]),
                NumericRange([[y_val]]), None, None, None, None
            ])

        ranges = np.array(ranges)
        densities = Inference.likelihood(
            spn,
            data=ranges,
            dtype=np.float64,
            node_likelihood=inference_support_ranges)[:, 0]

        for i, d in enumerate(densities):
            if d > 5:
                densities[i] = 5

        vals.append(densities)

    vals = np.array(vals)
    axes[0][0].contour(X, Y, vals)
    axes[0][0].set_xlabel("Method1")
    axes[0][0].set_ylabel("Method2")
    axes[0][0].set_title("Overall")

    evidence = [None, None, None, None, None, NominalRange([0])]
    prob_no_alarm, spn_no_alarm = spn_for_evidence(
        spn,
        evidence,
        node_likelihood=inference_support_ranges,
        distribution_update_ranges=distribution_update_ranges)
    print(prob_no_alarm)

    ranges = []
    vals = []
    for y_val in y_vals:
        print(y_val)
        ranges = []
        for x_val in x_vals:
            ranges.append([
                NumericRange([[x_val]]),
                NumericRange([[y_val]]), None, None, None, None
            ])

        ranges = np.array(ranges)
        densities = Inference.likelihood(
            spn_no_alarm,
            data=ranges,
            dtype=np.float64,
            node_likelihood=inference_support_ranges)[:, 0]

        for i, d in enumerate(densities):
            if d > 5:
                densities[i] = 5

        vals.append(densities)

    vals = np.array(vals)
    axes[0][1].contour(X, Y, vals)
    axes[0][1].set_xlabel("Method1")
    axes[0][1].set_ylabel("Method2")
    axes[0][1].set_title("Keine Epidemie")

    evidence = [None, None, None, None, None, NominalRange([1])]
    prob_alarm, spn_alarm = spn_for_evidence(
        spn,
        evidence,
        node_likelihood=inference_support_ranges,
        distribution_update_ranges=distribution_update_ranges)
    print(prob_alarm)

    ranges = []
    vals = []
    for y_val in y_vals:
        print(y_val)
        ranges = []
        for x_val in x_vals:
            ranges.append([
                NumericRange([[x_val]]),
                NumericRange([[y_val]]), None, None, None, None
            ])

        ranges = np.array(ranges)
        densities = Inference.likelihood(
            spn_alarm,
            data=ranges,
            dtype=np.float64,
            node_likelihood=inference_support_ranges)[:, 0]

        for i, d in enumerate(densities):
            if d > 5:
                densities[i] = 5

        vals.append(densities)

    vals = np.array(vals)
    axes[0][2].contour(X, Y, vals)
    axes[0][2].set_xlabel("Method1")
    axes[0][2].set_ylabel("Method2")
    axes[0][2].set_title("Epidemie")

    plt.savefig("cdp.pdf")

    plt.show()
def visualize_Density(spn):

    from spn.experiments.AQP.Ranges import NominalRange, NumericRange
    from spn.algorithms import Inference
    from simple_spn.InferenceRange import categorical_likelihood_range, gaussian_likelihood_range
    from spn.structure.Base import Sum, Product
    from spn.algorithms.Inference import sum_likelihood, prod_likelihood
    from spn.structure.leaves.parametric.Parametric import Gaussian, Categorical
    from simple_spn.UpdateRange import categorical_update_range

    inference_support_ranges = {
        Gaussian: None,
        Categorical: categorical_likelihood_range,
        Sum: sum_likelihood,
        Product: prod_likelihood
    }

    distribution_update_ranges = {
        Gaussian: None,
        Categorical: categorical_update_range
    }

    import matplotlib.pyplot as plt
    _, axes = plt.subplots(1,
                           5,
                           figsize=(15, 10),
                           squeeze=False,
                           sharey=False,
                           sharex=True)

    space_start = 0.00
    space_end = 1.0
    steps = 100
    max_y = 5

    for i in range(5):
        x_vals = np.linspace(space_start, space_end, num=steps)
        ranges = []
        for x_val in x_vals:
            r = [None] * i + [NumericRange([[x_val]])] + [None] * (5 - i)
            ranges.append(r)

        ranges = np.array(ranges)

        inference_support_ranges = {
            Gaussian: gaussian_likelihood_range,
            Categorical: categorical_likelihood_range,
            Sum: sum_likelihood,
            Product: prod_likelihood
        }

        y_vals = Inference.likelihood(
            spn,
            data=ranges,
            dtype=np.float64,
            node_likelihood=inference_support_ranges)[:, 0]

        axes[0][i].plot(x_vals, y_vals)
        axes[0][i].set_title("Method " + str(i) + " All")
        axes[0][i].set_ylim([0, max_y])

    evidence = [None, None, None, None, None, NominalRange([0])]
    prob_no_alarm, spn_no_alarm = spn_for_evidence(
        spn,
        evidence,
        node_likelihood=inference_support_ranges,
        distribution_update_ranges=distribution_update_ranges)
    print(prob_no_alarm)

    for i in range(5):
        x_vals = np.linspace(space_start, space_end, num=steps)
        ranges = []
        for x_val in x_vals:
            r = [None] * i + [NumericRange([[x_val]])] + [None] * (5 - i)
            ranges.append(r)

        ranges = np.array(ranges)

        inference_support_ranges = {
            Gaussian: gaussian_likelihood_range,
            Categorical: categorical_likelihood_range,
            Sum: sum_likelihood,
            Product: prod_likelihood
        }

        y_vals = Inference.likelihood(
            spn_no_alarm,
            data=ranges,
            dtype=np.float64,
            node_likelihood=inference_support_ranges)[:, 0]

        axes[0][i].plot(x_vals, y_vals, label="No Alarm", linestyle=":")

    evidence = [None, None, None, None, None, NominalRange([1])]
    prob_alarm, spn_alarm = spn_for_evidence(
        spn,
        evidence,
        node_likelihood=inference_support_ranges,
        distribution_update_ranges=distribution_update_ranges)
    print(prob_alarm)

    for i in range(5):
        x_vals = np.linspace(space_start, space_end, num=steps)
        ranges = []
        for x_val in x_vals:
            r = [None] * i + [NumericRange([[x_val]])] + [None] * (5 - i)
            ranges.append(r)

        ranges = np.array(ranges)

        inference_support_ranges = {
            Gaussian: gaussian_likelihood_range,
            Categorical: categorical_likelihood_range,
            Sum: sum_likelihood,
            Product: prod_likelihood
        }

        y_vals = Inference.likelihood(
            spn_alarm,
            data=ranges,
            dtype=np.float64,
            node_likelihood=inference_support_ranges)[:, 0]

        axes[0][i].plot(x_vals, y_vals, label="Alarm")

    plt.legend()
    plt.tight_layout()

    plt.savefig("pdp.pdf")

    plt.show()

    spn_util.plot_spn(spn, "pval.pdf")

    tmp = get_nodes_with_weight(spn, 5)

    for (weight, node) in tmp:
        print(str(round(node.p[1], 2)) + "\t" + str(weight))
Exemple #7
0
def probs_spflow(spn, data):
    return Inference.likelihood(spn, data, dtype=np.float64).reshape(len(data))
Exemple #8
0
 
 
 inference_support_ranges = {PiecewiseLinear : piecewise_likelihood_range, 
                             Categorical     : categorical_likelihood_range,
                             IdentityNumeric : identity_likelihood_range,
                             Sum             : sum_likelihood,
                             Product         : prod_likelihood}
 
 #Use None instead of np.nan
 ranges = np.array([[None, None, None],                                                          #Without any conditions
                    [NominalRange([0]), None, None],                                             #Only male
                    [NominalRange([0]), NominalRange([1]), None],                                #Only male and student
                    [NominalRange([0]), NominalRange([1]), NumericRange([[21,100]])],            #Only male and student and older than 21
                    [NominalRange([0]), NominalRange([1]), NumericRange([[10,15], [25,100]])]]   #Only male and student and age between 10 and 17 or 21 and 100
 )                  
 probabilities = Inference.likelihood(root_node, ranges, dtype=np.float64, node_likelihood=inference_support_ranges)
 
 print("Probabilities:")
 print(probabilities)
 print()
 
 
 
 #Sampling for given ranges
 from spn.algorithms import SamplingRange
 from spn.structure.leaves.piecewise.SamplingRange import sample_piecewise_node
 from spn.structure.leaves.parametric.SamplingRange import sample_categorical_node
 from spn.experiments.AQP.leaves.identity.SamplingRange import sample_identity_node
 
 node_sample_support = {PiecewiseLinear : sample_piecewise_node,
                        Categorical     : sample_categorical_node,
Exemple #9
0
def extract_rules(spn, feature_id=1):
    
    from spn.experiments.AQP.Ranges import NominalRange
    from spn.algorithms import Inference
    from simple_spn.internal.InferenceRange import categorical_likelihood_range
    from spn.structure.Base import Sum, Product
    from spn.algorithms.Inference import sum_likelihood, prod_likelihood
    from spn.structure.leaves.parametric.Parametric import Categorical
    
    inference_support_ranges = {Categorical     : categorical_likelihood_range,
                                    Sum             : sum_likelihood,
                                    Product         : prod_likelihood}
    
    
    
    
    
    freq_items = get_frequent_items(spn, min_support=0.0)
    freq_items_filtered = freq_items#filter(lambda x : any(cond[0] == feature_id for cond in x[1]), freq_items)
    freq_items_sorted = sorted(freq_items_filtered, key=lambda x: x[0], reverse=True)
    
    #evidence = numpy.empty((3,3,)
    
    
    feature_dict = {0: ("g", ("m  ", "w  ")), 1: ("c", ("no ", "yes")), 2: ("s", ("no ", "yes")), 3: ("w", ("no ", "yes"))}
    freq_sets = []
    for (sup, conds) in freq_items_sorted:
        
        str_conds=[]
        ranges = [None] * len(spn.scope)
        for cond in conds:
            ranges[cond[0]] = NominalRange([cond[1]])
            str_conds.append(feature_dict[cond[0]][0] + "=" + feature_dict[cond[0]][1][cond[1]])
            
        ranges = np.array([ranges])
        sup_spn = Inference.likelihood(spn, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:,0][0]
        

        freq_sets.append(["(" + ", ".join(str_conds) + ")", sup, sup_spn]) 
        
        
    rules = sorted(freq_sets, key=lambda x : x[2], reverse=True)
    rule_df = pd.DataFrame(rules, columns=["frequent set", "s_support", "g_support"])
    
    io.print_pretty_table(rule_df.head(400))
    
    
    exit()
    
    
    
    
    rules = []
    for (sup, conds) in freq_items_sorted:
        
        rule_body = []
        rule_head = []
        conf = np.nan
        
        ranges = [None] * len(spn.scope)
        
        
        
        
        for cond in conds:
            if cond[0] == feature_id:
                rule_head.append(feature_dict[cond[0]][0] + "=" + feature_dict[cond[0]][1][cond[1]])
            else:
                rule_body.append(feature_dict[cond[0]][0] + "=" + feature_dict[cond[0]][1][cond[1]])
            
            ranges[cond[0]] = NominalRange([cond[1]])
        
        
        #Optimization possible
        ranges = np.array([ranges])
        prob_with_feature = Inference.likelihood(spn, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:,0][0]
        
        ranges[0][feature_id] = None
        prob_without_feature = Inference.likelihood(spn, data=ranges, dtype=np.float64, node_likelihood=inference_support_ranges)[:,0][0]
        
        spn_sup = prob_without_feature
        spn_conf = prob_with_feature / prob_without_feature
        
        
        rules.append([" AND ".join(rule_body) + "-->" + " AND ".join(rule_head), sup, conf, spn_sup, spn_conf, spn_sup*spn_conf])
    
    
    rules = sorted(rules, key=lambda x : x[5], reverse=True)
    
    
     
    rule_df = pd.DataFrame(rules, columns=["Rule", "c_Support", "c_Confidence", "spn_Support", "spn_Confidence", "score"])
    
    #rule_df.drop_duplicates(subset=["Rule"], keep = True, inplace = True) 
    
    io.print_pretty_table(rule_df.head(400))
    

    
    pass
Exemple #10
0
        words = myfile.readline().strip()
        words = words[2:]
        words = words.split(';')

    # print(eq)
    print(words)

    spn = str_to_spn(eq, words)

    print(get_structure_stats(spn))

    # print(Text.toJSON(spn))

    data = np.loadtxt("40_testdata.txt", delimiter=';')

    ll = Inference.likelihood(spn, data)

    print(ll)
    print("average LL", np.mean(ll))

    ds_name, words, data, _, _, _, _ = get_nips_data()

    top_n_features = 40

    train, test = train_test_split(data[:, 0:top_n_features],
                                   test_size=0.2,
                                   random_state=42)

    ll = Inference.likelihood(spn, test)
    print("average LL2", np.mean(ll))