def test_log_vector_histogram(): # Construct a minimal SPN. h1 = Histogram([0., 1., 2.], [0.25, 0.75], [1, 1], scope=0) h2 = Histogram([0., 1., 2.], [0.45, 0.55], [1, 1], scope=1) h3 = Histogram([0., 1., 2.], [0.33, 0.67], [1, 1], scope=0) h4 = Histogram([0., 1., 2.], [0.875, 0.125], [1, 1], scope=1) p0 = Product(children=[h1, h2]) p1 = Product(children=[h3, h4]) spn = Sum([0.3, 0.7], [p0, p1]) inputs = np.column_stack(( np.random.randint(2, size=30), np.random.randint(2, size=30), )).astype("float64") if not CPUCompiler.isVectorizationSupported(): print("Test not supported by the compiler installation") return 0 # Execute the compiled Kernel. results = CPUCompiler(maxTaskSize=5).log_likelihood(spn, inputs, supportMarginal=False) # Compute the reference results using the inference from SPFlow. reference = log_likelihood(spn, inputs) reference = reference.reshape(30) # Check the computation results against the reference # Check in normal space if log-results are not very close to each other. assert np.all(np.isclose(results, reference)) or np.all(np.isclose(np.exp(results), np.exp(reference)))
def test_cpu_histogram(): # Construct a minimal SPN. h1 = Histogram([0., 1., 2.], [0.25, 0.75], [1, 1], scope=0) h2 = Histogram([0., 3., 6., 8.], [0.35, 0.1, 0.55], [1, 1], scope=1) h3 = Histogram([0., 1., 2.], [0.33, 0.67], [1, 1], scope=0) h4 = Histogram([0., 5., 8.], [0.875, 0.125], [1, 1], scope=1) p0 = Product(children=[h1, h2]) p1 = Product(children=[h3, h4]) spn = Sum([0.3, 0.7], [p0, p1]) inputs = np.column_stack(( np.random.randint(2, size=30), np.random.randint(8, size=30), )).astype("float64") # Insert some NaN in random places into the input data. inputs.ravel()[np.random.choice(inputs.size, 5, replace=False)] = np.nan if not CUDACompiler.isAvailable(): print("Test not supported by the compiler installation") return 0 # Execute the compiled Kernel. results = CUDACompiler().log_likelihood(spn, inputs) # Compute the reference results using the inference from SPFlow. reference = log_likelihood(spn, inputs) reference = reference.reshape(30) # Check the computation results against the reference # Check in normal space if log-results are not very close to each other. assert np.all(np.isclose(results, reference)) or np.all( np.isclose(np.exp(results), np.exp(reference)))
def _deserialize_histogram(self, node, node_map): breaks = node.hist.breaks densities = node.hist.densities reprPoints = node.hist.binReprPoints type = enum2Type.get(node.hist.type) metaType = enum2MetaType.get(node.hist.metaType) hist = Histogram(breaks=breaks, densities=densities, bin_repr_points=reprPoints, scope=node.hist.scope, type_=type, meta_type=metaType) hist.id = node.id return hist
def __init__(self, breaks, densities, bin_repr_points, scope=None, type_=None, meta_type=MetaType.UTILITY): # has same member variables as histogram Histogram.__init__(self, breaks, densities, bin_repr_points, scope, type_=None, meta_type=MetaType.UTILITY)
def histogram_tree_to_spn(tree, features, obj_type, tree_to_spn): node = Histogram(list(map(float, tree.children[1].children)), list(map(float, tree.children[2].children))) feature = str(tree.children[0]) node.scope.append(features.index(feature)) return node
def test_binary_serialization_roundtrip(tmpdir): """Tests the binary serialization for SPFlow SPNs by round-tripping a simple SPN through serialization and de-serialization and comparing the graph-structure before and after serialization & de-serialization.""" h1 = Histogram([0., 1., 2.], [0.25, 0.75], [1, 1], scope=1) h2 = Histogram([0., 1., 2.], [0.45, 0.55], [1, 1], scope=2) h3 = Histogram([0., 1., 2.], [0.33, 0.67], [1, 1], scope=1) h4 = Histogram([0., 1., 2.], [0.875, 0.125], [1, 1], scope=2) p0 = Product(children=[h1, h2]) p1 = Product(children=[h3, h4]) spn = Sum([0.3, 0.7], [p0, p1]) model = SPNModel(spn, featureValueType="uint32") query = JointProbability(model) binary_file = os.path.join(tmpdir, "test.bin") print(f"Test binary file: {binary_file}") BinarySerializer(binary_file).serialize_to_file(query) deserialized = BinaryDeserializer(binary_file).deserialize_from_file() assert (isinstance(deserialized, JointProbability)) assert (deserialized.batchSize == query.batchSize) assert (deserialized.errorModel.error == query.errorModel.error) assert (deserialized.errorModel.kind == query.errorModel.kind) assert (deserialized.graph.featureType == model.featureType) assert (deserialized.graph.name == model.name) deserialized = deserialized.graph.root assert get_number_of_nodes(spn) == get_number_of_nodes(deserialized) assert get_number_of_nodes(spn, Sum) == get_number_of_nodes(deserialized, Sum) assert get_number_of_nodes(spn, Product) == get_number_of_nodes( deserialized, Product) assert get_number_of_nodes(spn, Histogram) == get_number_of_nodes( deserialized, Histogram) assert get_number_of_edges(spn) == get_number_of_edges(deserialized)
def histogram_tree_to_spn(tree, features, obj_type, tree_to_spn): breaks = list(map(ast.literal_eval, tree.children[1].children)) densities = list(map(ast.literal_eval, tree.children[2].children)) bin_repr_points = list(map(ast.literal_eval, tree.children[3].children)) node = Histogram(breaks, densities, bin_repr_points) feature = str(tree.children[0]) if features is not None: node.scope.append(features.index(feature)) else: node.scope.append(int(feature[1:])) return node
def test_vector_slp_escaping_users(): g0 = Gaussian(mean=0.00, stdev=1, scope=0) g1 = Gaussian(mean=0.01, stdev=0.75, scope=1) g2 = Gaussian(mean=0.02, stdev=0.5, scope=2) g3 = Gaussian(mean=0.03, stdev=0.25, scope=3) g4 = Gaussian(mean=0.04, stdev=1, scope=4) g5 = Gaussian(mean=0.05, stdev=0.25, scope=5) g6 = Gaussian(mean=0.06, stdev=0.5, scope=6) g7 = Gaussian(mean=0.07, stdev=0.75, scope=7) g8 = Gaussian(mean=0.08, stdev=1, scope=8) g9 = Gaussian(mean=0.09, stdev=0.75, scope=9) g10 = Gaussian(mean=0.10, stdev=1, scope=10) g11 = Gaussian(mean=0.11, stdev=1, scope=11) h0 = Histogram([0., 1., 2.], [0.1, 0.9], [1, 1], scope=12) h1 = Histogram([0., 1., 2.], [0.2, 0.8], [1, 1], scope=13) h2 = Histogram([0., 1., 2.], [0.3, 0.7], [1, 1], scope=14) h3 = Histogram([0., 1., 2.], [0.4, 0.6], [1, 1], scope=15) h4 = Histogram([0., 1., 2.], [0.5, 0.5], [1, 1], scope=16) h5 = Histogram([0., 1., 2.], [0.6, 0.4], [1, 1], scope=17) h6 = Histogram([0., 1., 2.], [0.7, 0.3], [1, 1], scope=18) h7 = Histogram([0., 1., 2.], [0.8, 0.2], [1, 1], scope=19) c0 = Categorical(p=[0.1, 0.1, 0.8], scope=20) c1 = Categorical(p=[0.2, 0.2, 0.6], scope=21) c2 = Categorical(p=[0.3, 0.3, 0.4], scope=22) c3 = Categorical(p=[0.4, 0.4, 0.2], scope=23) c4 = Categorical(p=[0.5, 0.4, 0.1], scope=24) c5 = Categorical(p=[0.6, 0.3, 0.1], scope=25) c6 = Categorical(p=[0.7, 0.2, 0.1], scope=26) c7 = Categorical(p=[0.8, 0.1, 0.1], scope=27) s0 = Sum(children=[g8, h4], weights=[0.5, 0.5]) s1 = Sum(children=[g9, h5], weights=[0.5, 0.5]) s2 = Sum(children=[g10, c6], weights=[0.5, 0.5]) s3 = Sum(children=[g11, h7], weights=[0.5, 0.5]) s4 = Sum(children=[s0, c4], weights=[0.5, 0.5]) s5 = Sum(children=[s1, c5], weights=[0.5, 0.5]) s6 = Sum(children=[s2, g6], weights=[0.5, 0.5]) s7 = Sum(children=[s3, c7], weights=[0.5, 0.5]) s8 = Sum(children=[s4, g4], weights=[0.5, 0.5]) s9 = Sum(children=[s5, g5], weights=[0.5, 0.5]) s10 = Sum(children=[s6, h6], weights=[0.5, 0.5]) s11 = Sum(children=[s7, g7], weights=[0.5, 0.5]) p0 = Product(children=[h0, s8]) p1 = Product(children=[c1, s9]) p2 = Product(children=[c2, s10]) p3 = Product(children=[g3, s11]) p4 = Product(children=[p0, g0]) p5 = Product(children=[p1, g1]) p6 = Product(children=[p2, h2]) p7 = Product(children=[p3, c3]) p8 = Product(children=[p4, c0]) p9 = Product(children=[p5, h1]) p10 = Product(children=[p6, g2]) p11 = Product(children=[p7, h3]) s12 = Sum(children=[p8, p9], weights=[0.5, 0.5]) s13 = Sum(children=[p10, p11], weights=[0.5, 0.5]) s14 = Sum(children=[s12, p2], weights=[0.5, 0.5]) s15 = Sum(children=[s13, s2], weights=[0.5, 0.5]) spn = Product(children=[s14, s15]) # Randomly sample input values from Gaussian (normal) distributions. num_samples = 100 inputs = np.column_stack(( # gaussian np.random.normal(loc=0.5, scale=1, size=num_samples), np.random.normal(loc=0.125, scale=0.25, size=num_samples), np.random.normal(loc=0.345, scale=0.24, size=num_samples), np.random.normal(loc=0.456, scale=0.1, size=num_samples), np.random.normal(loc=0.94, scale=0.48, size=num_samples), np.random.normal(loc=0.56, scale=0.42, size=num_samples), np.random.normal(loc=0.76, scale=0.14, size=num_samples), np.random.normal(loc=0.32, scale=0.58, size=num_samples), np.random.normal(loc=0.58, scale=0.219, size=num_samples), np.random.normal(loc=0.14, scale=0.52, size=num_samples), np.random.normal(loc=0.24, scale=0.42, size=num_samples), np.random.normal(loc=0.34, scale=0.1, size=num_samples), # histogram np.random.randint(low=0, high=2, size=num_samples), np.random.randint(low=0, high=2, size=num_samples), np.random.randint(low=0, high=2, size=num_samples), np.random.randint(low=0, high=2, size=num_samples), np.random.randint(low=0, high=2, size=num_samples), np.random.randint(low=0, high=2, size=num_samples), np.random.randint(low=0, high=2, size=num_samples), np.random.randint(low=0, high=2, size=num_samples), # categorical np.random.randint(low=0, high=3, size=num_samples), np.random.randint(low=0, high=3, size=num_samples), np.random.randint(low=0, high=3, size=num_samples), np.random.randint(low=0, high=3, size=num_samples), np.random.randint(low=0, high=3, size=num_samples), np.random.randint(low=0, high=3, size=num_samples), np.random.randint(low=0, high=3, size=num_samples), np.random.randint(low=0, high=3, size=num_samples))).astype("float64") # Compute the reference results using the inference from SPFlow. reference = log_likelihood(spn, inputs) reference = reference.reshape(num_samples) # Compile the kernel with batch size 1 to enable SLP vectorization. compiler = CPUCompiler(vectorize=True, computeInLogSpace=True, vectorLibrary="LIBMVEC") kernel = compiler.compile_ll(spn=spn, batchSize=1, supportMarginal=False) # Execute the compiled Kernel. time_sum = 0 for i in range(len(reference)): # Check the computation results against the reference start = time.time() result = compiler.execute(kernel, inputs=np.array([inputs[i]])) time_sum = time_sum + time.time() - start print( f"evaluation #{i}: result: {result[0]:16.8f}, reference: {reference[i]:16.8f}", end='\r') if not np.isclose(result, reference[i]): print( f"\nevaluation #{i} failed: result: {result[0]:16.8f}, reference: {reference[i]:16.8f}" ) raise AssertionError() print(f"\nExecution of {len(reference)} samples took {time_sum} seconds.")