def test_optimization(self): np.random.seed(17) d1 = np.random.normal(10, 5, size=2000).tolist() d2 = np.random.normal(30, 5, size=2000).tolist() data = d1 + d2 data = np.array(data).reshape((-1, 10)) data = data.astype(np.float32) ds_context = Context(meta_types=[MetaType.REAL] * data.shape[1], parametric_types=[Gaussian] * data.shape[1]) spn = learn_parametric(data, ds_context) spn.weights = [0.8, 0.2] spn.children[0].children[0].mean = 3.0 py_ll = np.sum(log_likelihood(spn, data)) print(spn.weights, spn.children[0].children[0].mean) EM_optimization(spn, data, iterations=10) print(spn.weights, spn.children[0].children[0].mean) py_ll_opt = np.sum(log_likelihood(spn, data)) self.assertLessEqual(py_ll, py_ll_opt) self.assertAlmostEqual(spn.weights[0], 0.5, 6) self.assertAlmostEqual(spn.weights[1], 0.5, 6) self.assertAlmostEqual(spn.children[0].children[0].mean, 10.50531, 4)
def test_optimization(self): np.random.seed(17) data = np.random.normal(10, 0.01, size=2000).tolist() + np.random.normal( 30, 10, size=2000).tolist() data = np.array(data).reshape((-1, 10)) data = data.astype(np.float32) ds_context = Context(meta_types=[MetaType.REAL] * data.shape[1], parametric_types=[Gaussian] * data.shape[1]) spn = learn_parametric(data, ds_context) spn.weights = [0.8, 0.2] py_ll = log_likelihood(spn, data) print(spn.weights) EM_optimization(spn, data) print(spn.weights) py_ll_opt = log_likelihood(spn, data)
def test_clustering(self): np.random.seed(0) centers = [[10, 10], [-10, -10], [10, -10]] center_stdev = 0.7 X, labels_true = make_blobs(n_samples=1000000, centers=centers, cluster_std=center_stdev) initial_cluster_centers = [[1, 1], [0, 0], [1, 0]] g0x = Gaussian(mean=initial_cluster_centers[0][0], stdev=1.0, scope=0) g0y = Gaussian(mean=initial_cluster_centers[0][1], stdev=1.0, scope=1) g1x = Gaussian(mean=initial_cluster_centers[1][0], stdev=1.0, scope=0) g1y = Gaussian(mean=initial_cluster_centers[1][1], stdev=1.0, scope=1) g2x = Gaussian(mean=initial_cluster_centers[2][0], stdev=1.0, scope=0) g2y = Gaussian(mean=initial_cluster_centers[2][1], stdev=1.0, scope=1) spn = 0.6 * (0.5 * (g0x * g0y) + 0.5 * (g1x * g1y)) + 0.4 * (g2x * g2y) EM_optimization(spn, X, iterations=5) cluster_centers2 = [[g0x.mean, g0y.mean], [g1x.mean, g1y.mean], [g2x.mean, g2y.mean]] print("\ntrue centers", centers) print("initial ctrs", initial_cluster_centers) print("final ctrs", cluster_centers2) for i, cluster_location in enumerate(centers): self.assertAlmostEqual(cluster_location[0], cluster_centers2[i][0], 2) self.assertAlmostEqual(cluster_location[1], cluster_centers2[i][1], 2) for n in get_nodes_by_type(spn, Gaussian): self.assertAlmostEqual(n.stdev, center_stdev, 2)
# py_ll = np.mean(log_likelihood(spn, data)) # print(f'{py_ll:.8f},[{s0.weights[0]:.4f},{s0.weights[1]:.4f}], [{s1.weights[0]:.4f},{s1.weights[1]:.4f}]') ## set different starting point print("Setting weights to diff starting point.") s0.weights[0] = s1.weights[0] = 0.1 s0.weights[1] = s1.weights[1] = 0.9 print(f'{"Eval of artifical data":60}', end='') lls = [] weights = [] for i in range(100): ll = np.mean(log_likelihood(spn, data)) lls.append(ll) weights.append(s0.weights[0]) EM_optimization(spn, data, iterations=1) fig, axs = plt.subplots(2, 1) fig.suptitle("Starting weights [0.1, 0.9]") axs[0].plot(range(100), lls, label="LL") axs[0].legend() axs[0].set_ylabel("LL") axs[0].set_xlabel("iteration") max_y = max(lls) max_x = lls.index(max_y) axs[0].annotate(f'Max @ {max_y:.4f}', xy=(max_x, max_y), arrowprops=dict(facecolor='black', shrink=0.01)) axs[0].annotate(f"{py_ll:.4f} @ weights [.34,.66]",