Beispiel #1
0
def sum_condition(node, children, input_vals=None, scope=None):
    if not scope.intersection(node.scope):
        return Copy(node), 0
    new_node = Sum()
    new_node.scope = list(set(node.scope) - scope)
    new_weights = []
    probs = []
    for i, c in enumerate(children):
        if c[0]:
            new_node.children.append(c[0])
            new_weights.append(node.weights[i] * np.exp(c[1]))
        else:
            probs.append(node.weights[i] * np.exp(c[1]))
    new_node.weights = [w / sum(new_weights) for w in new_weights]
    assert np.all(np.logical_not(np.isnan(
        new_node.weights))), 'Found nan weights'
    if not new_node.scope:
        return None, np.log(sum(probs))
    return new_node, np.log(sum(new_weights))
def train_spn(window_size=3,
              min_instances_slice=10000,
              features=None,
              number_of_classes=3):
    if features is None:
        features = [20, 120]

    add_parametric_inference_support()
    add_parametric_text_support()

    data = get_data_in_window(window_size=window_size,
                              features=features,
                              three_classes=number_of_classes == 3)

    sss = sk.model_selection.StratifiedShuffleSplit(test_size=0.2,
                                                    train_size=0.8,
                                                    random_state=42)
    for train_index, test_index in sss.split(
            data[:, 0:window_size * window_size * len(features)],
            data[:, (window_size * window_size * len(features)) +
                 (int(window_size * window_size / 2))]):
        X_train, X_test = data[train_index], data[test_index]

    context_list = list()
    parametric_list = list()
    number_of_features = len(features)
    for _ in range(number_of_features * window_size * window_size):
        context_list.append(MetaType.REAL)
        parametric_list.append(Gaussian)

    for _ in range(window_size * window_size):
        context_list.append(MetaType.DISCRETE)
        parametric_list.append(Categorical)

    ds_context = Context(meta_types=context_list)
    ds_context.add_domains(data)
    ds_context.parametric_types = parametric_list

    spn = load_spn(window_size, features, min_instances_slice,
                   number_of_classes)
    if spn is None:
        spn = Sum()
        for class_pixel in tqdm(range(-window_size * window_size, 0)):
            for label, count in zip(
                    *np.unique(data[:, class_pixel], return_counts=True)):
                train_data = X_train[X_train[:, class_pixel] == label, :]
                branch = learn_parametric(
                    train_data,
                    ds_context,
                    min_instances_slice=min_instances_slice)
                spn.children.append(branch)
                spn.weights.append(train_data.shape[0])

        spn.scope.extend(branch.scope)
        spn.weights = (np.array(spn.weights) / sum(spn.weights)).tolist()

        assign_ids(spn)
        save_spn(spn, window_size, features, min_instances_slice,
                 number_of_classes)

    res = np.ndarray((X_test.shape[0], number_of_classes))

    for i in tqdm(range(number_of_classes)):
        tmp = X_test.copy()
        tmp[:, -int((window_size**2) / 2)] = i
        res[:, i] = log_likelihood(spn, tmp)[:, 0]

    predicted_classes = np.argmax(res, axis=1).reshape((X_test.shape[0], 1))

    correct_predicted = 0
    for x, y in zip(X_test[:, -5], predicted_classes):
        if x == y[0]:
            correct_predicted += 1
    accuracy = correct_predicted / X_test.shape[0]
    return spn, accuracy
Beispiel #3
0
# create an SPN over three random variables X_1, X_2, X_3
from spn.structure.leaves.typedleaves.Text import add_typed_leaves_text_support
from spn.structure.leaves.typedleaves.TypedLeaves import type_mixture_leaf_factory

add_typed_leaves_text_support()
add_parametric_inference_support()
#
# root is a sum
root = Sum()

#
# two product nodes
l_prod = Product()
r_prod = Product()
root.children = [l_prod, r_prod]
root.weights = np.array([0.75, 0.25])

#
# priors, but useless
pm_continuous_param_map = OrderedDict({
    Type.REAL:
    OrderedDict({Gaussian: {
        'params': {
            'mean': 5,
            'stdev': 5
        },
        'prior': None
    }}),
    Type.POSITIVE:
    OrderedDict({
        Gamma: {
Beispiel #4
0
def Make_SPN_from_RegionGraph(rg_layers, rgn, num_classes, num_gauss, num_sums, default_mean=0.0, default_stdev=1.0):
    def add_to_map(given_map, key, item):
        existing_items = given_map.get(key, [])
        given_map[key] = existing_items + [item]

    region_distributions = {}
    region_products = {}
    vector_list = [[]]
    for leaf_region in rg_layers[0]:
        gauss_vector = []
        for _ in range(num_gauss):
            prod = Product()
            prod.scope.extend(leaf_region)
            for r in leaf_region:
                prod.children.append(Gaussian(mean=rgn.randn(1)[0], stdev=default_stdev, scope=[r]))
                # prod.children.append(Gaussian(mean=0, stdev=default_stdev, scope=[r]))

            assert len(prod.children) > 0
            gauss_vector.append(prod)

        vector_list[-1].append(gauss_vector)
        region_distributions[leaf_region] = gauss_vector

    for layer_idx in range(1, len(rg_layers)):
        vector_list.append([])
        if layer_idx % 2 == 1:
            partitions = rg_layers[layer_idx]
            for i, partition in enumerate(partitions):
                input_regions = list(partition)
                input1 = region_distributions[input_regions[0]]
                input2 = region_distributions[input_regions[1]]

                prod_vector = []
                for c1 in input1:
                    for c2 in input2:
                        prod = Product()
                        prod.children.append(c1)
                        prod.children.append(c2)
                        prod.scope.extend(c1.scope)
                        prod.scope.extend(c2.scope)
                        prod_vector.append(prod)

                        assert len(prod.children) > 0

                vector_list[-1].append(prod_vector)

                resulting_region = frozenset(input_regions[0] | input_regions[1])
                add_to_map(region_products, resulting_region, prod_vector)
        else:
            cur_num_sums = num_classes if layer_idx == len(rg_layers) - 1 else num_sums

            regions = rg_layers[layer_idx]
            for i, region in enumerate(regions):
                product_vectors = list(itertools.chain.from_iterable(region_products[region]))

                sum_vector = []

                for _ in range(cur_num_sums):
                    sum_node = Sum()
                    sum_node.scope.extend(region)
                    sum_node.children.extend(product_vectors)
                    sum_vector.append(sum_node)
                    sum_node.weights.extend(rgn.dirichlet([1] * len(sum_node.children), 1)[0].tolist())
                    # w = np.array([1] * len(sum_node.children))
                    # w = w / np.sum(w)
                    # sum_node.weights.extend(w.tolist())

                    assert len(sum_node.children) > 0

                vector_list[-1].append(sum_vector)

                region_distributions[region] = sum_vector

    tmp_root = Sum()
    tmp_root.children.extend(vector_list[-1][0])
    tmp_root.scope.extend(tmp_root.children[0].scope)
    tmp_root.weights = [1 / len(tmp_root.children)] * len(tmp_root.children)
    assign_ids(tmp_root)

    v, err = is_valid(tmp_root)
    assert v, err
    return vector_list, tmp_root