def main(): np.random.seed(12) data_dim = 8 n_data = 10 threshold_missing = 0.5 mu = np.random.randn(data_dim, 1) sigma = make_spd_matrix( n_dim=data_dim) # Generate a random positive semi-definite matrix # test if the matrix is positive definite # print(is_pos_def(sigma)) x_full = gauss.gauss_sample(mu, sigma, n_data) missing = np.random.rand(n_data, data_dim) < threshold_missing x_miss = np.copy(x_full) x_miss[missing] = np.nan x_imputed = gauss.gauss_impute(mu, sigma, x_miss) #Create a matrix from x_miss by replacing the NaNs with 0s to display the hinton_diagram xmiss0 = np.copy(x_miss) for g in np.argwhere(np.isnan(x_miss)): xmiss0[g[0], g[1]] = 0 plot_1 = plt.figure(1) pml.hinton_diagram(xmiss0, ax=plot_1.gca()) plot_1.suptitle('Observed') pml.savefig("gauss_impute_observed.pdf", dpi=300) plot_2 = plt.figure(2) pml.hinton_diagram(x_full, ax=plot_2.gca()) plot_2.suptitle('Hidden truth') pml.savefig("gauss_impute_truth.pdf", dpi=300) plot_3 = plt.figure(3) pml.hinton_diagram(x_imputed, ax=plot_3.gca()) plot_3.suptitle('Imputation with true params') pml.savefig("gauss_impute_pred.pdf", dpi=300) plt.show()
from sklearn.datasets import make_spd_matrix from sklearn.linear_model import LinearRegression from sklearn.metrics import r2_score np.random.seed(4) data_dim = 4 n_data = 100 threshold_missing = 0.5 mu = np.random.randn(data_dim, 1) sigma = make_spd_matrix(n_dim=data_dim) x_full = gauss.gauss_sample(mu, sigma, n_data) missing = np.random.rand(n_data, data_dim) < threshold_missing x_miss = np.copy(x_full) x_miss[missing] = np.nan x_impute_oracle = gauss.gauss_impute(mu, sigma, x_miss) result = gauss.gauss_fit_em(x_miss) m = result.get('mu') sig = result.get('Sigma') x_impute_em = gauss.gauss_impute(m, sig, x_miss) def plot_performance(x_miss, x_full, x_impute): r_squared = [] for i in range(4): miss = np.argwhere(np.isnan(x_miss[:, i])) r2 = r2_score(x_full[miss, i], x_impute[miss, i]) r_squared.append(r2) fig, axs = plt.subplots(nrows=2, ncols=2)
import pyprobml_utils as pml np.random.seed(12) data_dim = 8 n_data = 10 threshold_missing = 0.5 mu = np.random.randn(data_dim, 1) sigma = make_spd_matrix( n_dim=data_dim) # Generate a random positive semi-definite matrix # test if the matrix is positive definite # print(is_pos_def(sigma)) x_full = gauss.gauss_sample(mu, sigma, n_data) missing = np.random.rand(n_data, data_dim) < threshold_missing x_miss = np.copy(x_full) x_miss[missing] = np.nan x_imputed = gauss.gauss_impute(mu, sigma, x_miss) #Create a matrix from x_miss by replacing the NaNs with 0s to display the hinton_diagram xmiss0 = np.copy(x_miss) for g in np.argwhere(np.isnan(x_miss)): xmiss0[g[0], g[1]] = 0 plot_1 = plt.figure(1) pml.hinton_diagram(xmiss0, ax=plot_1.gca()) plot_1.suptitle('Observed') pml.savefig("gauss_impute_observed.pdf", dpi=300) plot_2 = plt.figure(2) pml.hinton_diagram(x_full, ax=plot_2.gca()) plot_2.suptitle('Hidden truth') pml.savefig("gauss_impute_truth.pdf", dpi=300)