예제 #1
0
    def test_optimization(self):
        np.random.seed(17)
        d1 = np.random.normal(10, 5, size=2000).tolist()
        d2 = np.random.normal(30, 5, size=2000).tolist()
        data = d1 + d2
        data = np.array(data).reshape((-1, 10))
        data = data.astype(np.float32)

        ds_context = Context(meta_types=[MetaType.REAL] * data.shape[1],
                             parametric_types=[Gaussian] * data.shape[1])

        spn = learn_parametric(data, ds_context)

        spn.weights = [0.8, 0.2]
        spn.children[0].children[0].mean = 3.0

        py_ll = np.sum(log_likelihood(spn, data))

        print(spn.weights, spn.children[0].children[0].mean)

        EM_optimization(spn, data, iterations=10)

        print(spn.weights, spn.children[0].children[0].mean)

        py_ll_opt = np.sum(log_likelihood(spn, data))

        self.assertLessEqual(py_ll, py_ll_opt)
        self.assertAlmostEqual(spn.weights[0], 0.5, 6)
        self.assertAlmostEqual(spn.weights[1], 0.5, 6)
        self.assertAlmostEqual(spn.children[0].children[0].mean, 10.50531, 4)
예제 #2
0
    def test_optimization(self):
        np.random.seed(17)
        data = np.random.normal(10, 0.01,
                                size=2000).tolist() + np.random.normal(
                                    30, 10, size=2000).tolist()
        data = np.array(data).reshape((-1, 10))
        data = data.astype(np.float32)

        ds_context = Context(meta_types=[MetaType.REAL] * data.shape[1],
                             parametric_types=[Gaussian] * data.shape[1])

        spn = learn_parametric(data, ds_context)

        spn.weights = [0.8, 0.2]

        py_ll = log_likelihood(spn, data)

        print(spn.weights)

        EM_optimization(spn, data)

        print(spn.weights)

        py_ll_opt = log_likelihood(spn, data)
예제 #3
0
    def test_clustering(self):
        np.random.seed(0)

        centers = [[10, 10], [-10, -10], [10, -10]]
        center_stdev = 0.7
        X, labels_true = make_blobs(n_samples=1000000,
                                    centers=centers,
                                    cluster_std=center_stdev)

        initial_cluster_centers = [[1, 1], [0, 0], [1, 0]]
        g0x = Gaussian(mean=initial_cluster_centers[0][0], stdev=1.0, scope=0)
        g0y = Gaussian(mean=initial_cluster_centers[0][1], stdev=1.0, scope=1)
        g1x = Gaussian(mean=initial_cluster_centers[1][0], stdev=1.0, scope=0)
        g1y = Gaussian(mean=initial_cluster_centers[1][1], stdev=1.0, scope=1)
        g2x = Gaussian(mean=initial_cluster_centers[2][0], stdev=1.0, scope=0)
        g2y = Gaussian(mean=initial_cluster_centers[2][1], stdev=1.0, scope=1)

        spn = 0.6 * (0.5 * (g0x * g0y) + 0.5 * (g1x * g1y)) + 0.4 * (g2x * g2y)

        EM_optimization(spn, X, iterations=5)

        cluster_centers2 = [[g0x.mean, g0y.mean], [g1x.mean, g1y.mean],
                            [g2x.mean, g2y.mean]]

        print("\ntrue centers", centers)
        print("initial ctrs", initial_cluster_centers)
        print("final   ctrs", cluster_centers2)

        for i, cluster_location in enumerate(centers):
            self.assertAlmostEqual(cluster_location[0], cluster_centers2[i][0],
                                   2)
            self.assertAlmostEqual(cluster_location[1], cluster_centers2[i][1],
                                   2)

        for n in get_nodes_by_type(spn, Gaussian):
            self.assertAlmostEqual(n.stdev, center_stdev, 2)
예제 #4
0
# py_ll = np.mean(log_likelihood(spn, data))
# print(f'{py_ll:.8f},[{s0.weights[0]:.4f},{s0.weights[1]:.4f}], [{s1.weights[0]:.4f},{s1.weights[1]:.4f}]')

## set different starting point
print("Setting weights to diff starting point.")
s0.weights[0] = s1.weights[0] = 0.1
s0.weights[1] = s1.weights[1] = 0.9

print(f'{"Eval of artifical data":60}', end='')
lls = []
weights = []
for i in range(100):
    ll = np.mean(log_likelihood(spn, data))
    lls.append(ll)
    weights.append(s0.weights[0])
    EM_optimization(spn, data, iterations=1)

fig, axs = plt.subplots(2, 1)
fig.suptitle("Starting weights [0.1, 0.9]")

axs[0].plot(range(100), lls, label="LL")
axs[0].legend()
axs[0].set_ylabel("LL")
axs[0].set_xlabel("iteration")
max_y = max(lls)

max_x = lls.index(max_y)
axs[0].annotate(f'Max @ {max_y:.4f}',
                xy=(max_x, max_y),
                arrowprops=dict(facecolor='black', shrink=0.01))
axs[0].annotate(f"{py_ll:.4f} @ weights [.34,.66]",