def plot_approximated_function(regr, x_range, y_range, filename, title=None): x_grid, y_grid = np.meshgrid(x_range, y_range) input_data = [] for x1, x2 in zip(np.ravel(x_grid), np.ravel(y_grid)): input_data.append([x1, x2]) input_data = np.array(input_data) z_value = np.array(regr(input_data)) z_grid = np.reshape(z_value, (x_grid.shape[0], x_grid.shape[1])) utils.plot_3d(x_grid, y_grid, z_grid, filename, title)
def plot_approximated_function(regr, session, x_range, y_range, filename, title=None): x_grid, y_grid = np.meshgrid(x_range, y_range) input_data = [] for x1, x2 in zip(np.ravel(x_grid), np.ravel(y_grid)): input_data.append([x1, x2]) input_data = np.array(input_data) z_value = np.array(regr.predict(session, input_data)) z_grid = np.reshape(z_value, (x_grid.shape[0], x_grid.shape[1])) utils.plot_3d(x_grid, y_grid, z_grid, "../images/" + filename.replace(".", ""), title)
import utils import tqdm def precalc_logit_moments(mus, sigmas): keys = list(itertools.product(mus, sigmas)) old = utils.get_values(keys, LOGIT_FNAME) for mu, sigma in tqdm.tqdm(keys): if (mu, sigma) not in old: avg, var = logit_norm_moments(mu, sigma) old[(mu, sigma)] = (avg, var) utils.dump_values(old, LOGIT_FNAME) LOGIT_CACHE.update(old) return old if __name__ == '__main__': mus = np.arange(-6, 6, 0.2) sigmas = np.sort(np.arange(0.01, 7., 0.03)) precalc_logit_moments(mus, sigmas) x, y, z1, z2 = [], [], [], [] for mu, sigma in tqdm.tqdm(list(itertools.product(mus, sigmas))): avg, var = logit_norm_moments(mu, sigma) x.append(mu) y.append(sigma) z1.append(avg) z2.append(var) utils.plot_3d(x, y, z1, None) utils.plot_3d(x, y, z2, None)
def test(test_id, dir, strength_scale, n_samples, num_features, num_instruments, num_treatments, num_outcomes): def tau_fn(x, p): return ( -1.5 * x + .9 * (x**2) ) * p #np.abs(p) * x # #np.abs(x) #-1.5 * x + .9 * (x**2)# 2/(1+np.exp(-2*x)) #-1.5 * x + .9 * (x**2) #np.abs(x) #-1.5 * x + .9 * (x**2) #np.abs(x) #-1.5 * x + .9 * (x**2) #np.sin(x) #1. * (x<0) + 2.5 * (x>=0) #np.abs(x) # 1. * (x<0) + 3. * (x>=0) #-1.5 * x + .9 * (x**2) #-1.5 * x + .9 * (x**2) #np.abs(x) #-1.5 * x + .9 * (x**2) + x**3 #-1.5 * x + .9 * (x**2) + x**3 # np.sin(x) #-1.5 * x + .9 * (x**2) + x**3 #np.sin(x) #-1.5 * x + .9 * (x**2) + x**3 #np.sin(x) #np.abs(x) #np.sin(x) #2/(1+np.exp(-2*x)) #2/(1+np.exp(-2*x)) #1.5 * x - .9 * (x**2) #2/(1+np.exp(-2*x))#-1.5 * x + .9 * (x**2) iv_strength = strength_scale * np.random.uniform( 1., 1.1, size=(num_instruments, 1)) degree_benchmarks = 3 # Network parameters hidden_layers = [1000, 1000, 1000] # Generate data data_x, data_z, data_treatment, data_y = get_data(n_samples, num_instruments, iv_strength, tau_fn, num_features) data_z = np.concatenate((data_z, data_x), axis=1) data_p = np.concatenate((data_treatment, data_x), axis=1) num_instruments = num_features + num_instruments num_treatments = num_features + num_treatments print(data_p.shape) print(data_z.shape) print(data_y.shape) if num_instruments >= 2: plt.figure() plt.subplot(1, 4, 1) plt.scatter(data_z[:, 0], data_p[:, 0], label='p vs z1') plt.legend() plt.subplot(1, 4, 2) plt.scatter(data_z[:, 1], data_p[:, 0], label='p vs z2') plt.legend() plt.subplot(1, 4, 3) plt.scatter(data_p[:, 0], data_y) plt.legend() plt.subplot(1, 4, 4) plt.scatter(data_p[:, 1], data_y) plt.legend() plt.savefig(os.path.join(dir, 'data_{}.png'.format(test_id))) # We reset the whole graph dgmm = DeepGMM( n_critics=70, num_steps=200, store_step=5, learning_rate_modeler=0.01, learning_rate_critics=0.01, critics_jitter=True, dissimilarity_eta=0.0, cluster_type='kmeans', critic_type='Gaussian', critics_precision=None, min_cluster_size=200, #num_trees=5, eta_hedge=0.16, bootstrap_hedge=False, l1_reg_weight_modeler=0.0, l2_reg_weight_modeler=0.0, dnn_layers=hidden_layers, dnn_poly_degree=1, log_summary=False, summary_dir='./graphs_monte') dgmm.fit(data_z, data_p, data_y) test_min = np.percentile(data_p, 10) test_max = np.percentile(data_p, 90) test_grid = np.array( list( itertools.product(np.linspace(test_min, test_max, 100), repeat=num_treatments))) print(test_grid.shape) test_data_x, _, test_data_treatment, _ = get_data(5 * n_samples, num_instruments, iv_strength, tau_fn, num_features) test_data_p = np.concatenate((test_data_treatment, test_data_x), axis=1) print(test_data_p.shape) clip_edges = (np.all((test_data_p > test_min), axis=1) & np.all( (test_data_p < test_max), axis=1)).flatten() test_data_p = test_data_p[clip_edges, :] test_data_treatment = test_data_treatment[clip_edges, :] test_data_x = test_data_x[clip_edges, :] print(test_data_p.shape) best_fn_grid = dgmm.predict(test_grid, model='best') final_fn_grid = dgmm.predict(test_grid, model='final') avg_fn_grid = dgmm.predict(test_grid, model='avg') best_fn_dist = dgmm.predict(test_data_p, model='best') final_fn_dist = dgmm.predict(test_data_p, model='final') avg_fn_dist = dgmm.predict(test_data_p, model='avg') ################################## # Benchmarks ################################## from sklearn.linear_model import LinearRegression, MultiTaskElasticNet, ElasticNet from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import Pipeline from sklearn.neural_network import MLPRegressor direct_poly = Pipeline([('poly', PolynomialFeatures(degree=degree_benchmarks)), ('linear', LinearRegression())]) direct_poly.fit(data_p, data_y.flatten()) direct_poly_fn_grid = direct_poly.predict(test_grid) direct_poly_fn_dist = direct_poly.predict(test_data_p) direct_nn = MLPRegressor(hidden_layer_sizes=hidden_layers) direct_nn.fit(data_p, data_y.flatten()) direct_nn_fn_grid = direct_nn.predict(test_grid) direct_nn_fn_dist = direct_nn.predict(test_data_p) plf = PolynomialFeatures(degree=degree_benchmarks) sls_poly_first = MultiTaskElasticNet() sls_poly_first.fit(plf.fit_transform(data_z), plf.fit_transform(data_p)) sls_poly_second = ElasticNet() sls_poly_second.fit(sls_poly_first.predict(plf.fit_transform(data_z)), data_y) sls_poly_fn_grid = sls_poly_second.predict(plf.fit_transform(test_grid)) sls_poly_fn_dist = sls_poly_second.predict(plf.fit_transform(test_data_p)) sls_first = LinearRegression() sls_first.fit(data_z, data_p) sls_second = LinearRegression() sls_second.fit(sls_first.predict(data_z), data_y) sls_fn_grid = sls_second.predict(test_grid) sls_fn_dist = sls_second.predict(test_data_p) ###### # Deep IV ##### # We reset the whole graph with tf.name_scope("DeepIV"): deep_iv = deep_iv_fit(data_x, data_z, data_treatment, data_y, epochs=10, hidden=hidden_layers) deep_iv_fn_grid = deep_iv.predict([test_grid[:, 1], test_grid[:, 0]]) deep_iv_fn_dist = deep_iv.predict([test_data_x, test_data_treatment]) plt.figure() plot_3d(test_grid, tau_fn(test_grid[:, [1]], test_grid[:, [0]]).flatten()) plt.savefig(os.path.join(dir, 'true_{}.png'.format(test_id))) print(avg_fn_grid.shape) plt.figure() plot_3d(test_grid, avg_fn_grid.flatten()) plt.savefig(os.path.join(dir, 'avg_fn_{}.png'.format(test_id))) plt.figure() plot_3d(test_grid, best_fn_grid.flatten()) plt.savefig(os.path.join(dir, 'best_fn_{}.png'.format(test_id))) plt.figure() plot_3d(test_grid, final_fn_grid.flatten()) plt.savefig(os.path.join(dir, 'final_fn_{}.png'.format(test_id))) plt.figure() plot_3d(test_grid, deep_iv_fn_grid.flatten()) plt.savefig(os.path.join(dir, 'deep_iv_{}.png'.format(test_id))) plt.figure() plot_3d(test_grid, sls_poly_fn_grid.flatten()) plt.savefig(os.path.join(dir, 'sls_poly_{}.png'.format(test_id))) plt.figure() plot_3d(test_grid, sls_fn_grid.flatten()) plt.savefig(os.path.join(dir, 'sls_{}.png'.format(test_id))) plt.figure() plot_3d(test_grid, direct_poly_fn_grid.flatten()) plt.savefig(os.path.join(dir, 'direct_poly_{}.png'.format(test_id))) plt.figure() plot_3d(test_grid, direct_nn_fn_grid.flatten()) plt.savefig(os.path.join(dir, 'direct_nn_{}.png'.format(test_id))) def mse_test(y_true, y_pred): return 1 - np.mean((y_pred.flatten() - y_true.flatten())**2) / np.var( y_true.flatten()) mse_best = mse_test(tau_fn(test_data_x, test_data_treatment), best_fn_dist) mse_final = mse_test(tau_fn(test_data_x, test_data_treatment), final_fn_dist) mse_avg = mse_test(tau_fn(test_data_x, test_data_treatment), avg_fn_dist) mse_2sls_poly = mse_test(tau_fn(test_data_x, test_data_treatment), sls_poly_fn_dist) mse_direct_poly = mse_test(tau_fn(test_data_x, test_data_treatment), direct_poly_fn_dist) mse_direct_nn = mse_test(tau_fn(test_data_x, test_data_treatment), direct_nn_fn_dist) mse_2sls = mse_test(tau_fn(test_data_x, test_data_treatment), sls_fn_dist) mse_deep_iv = mse_test(tau_fn(test_data_x, test_data_treatment), deep_iv_fn_dist) on_p_dist = [ mse_best, mse_final, mse_avg, mse_deep_iv, mse_2sls_poly, mse_2sls, mse_direct_poly, mse_direct_nn ] mse_best = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]), best_fn_grid) mse_final = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]), final_fn_grid) mse_avg = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]), avg_fn_grid) mse_2sls_poly = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]), sls_poly_fn_grid) mse_direct_poly = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]), direct_poly_fn_grid) mse_direct_nn = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]), direct_nn_fn_grid) mse_2sls = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]), sls_fn_grid) mse_deep_iv = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]), deep_iv_fn_grid) on_p_grid = [ mse_best, mse_final, mse_avg, mse_deep_iv, mse_2sls_poly, mse_2sls, mse_direct_poly, mse_direct_nn ] return on_p_dist, on_p_grid
pwd_stacked = '/home/ov/data/stacked/' pwd_stacked_out = pwd_stacked + 'nfpat' + str(patient_nr) + scid + '.stacked_icp_abp.npy' # ============================================================================= # Load waves: stacked_waves = np.load(pwd_stacked_out) """ stacked_waves = load_waves_by_pat_scid(patids, scid, pwd) waves = utils.acces_waves_by_type(None, stacked_waves, wave_type) print('Stacked size: {}'.format(stacked_waves.shape)) print('Unstacked {} waves: {}'.format(wave_type, waves.shape)) # ============================================================================= # Fit-Transforming PCA on acquired waves: fitted = utils.pca_projections(waves, 3, svd_solver='arpack') print('Fitted PCA: {}'.format(fitted.shape)) """ xs = fitted[:, 0] ys = fitted[:, 1] zs = fitted[:, 2] utils.plot_3d(xs, ys, zs, title='PCA projection of {} waves of PAT{} ({})'.format(wave_type, patient_nr, scid), xlabel='PC1', ylabel='PC2', zlabel='PC3', s=1) """ # ============================================================================= # Gaussian Mixture Models and, if needed, additional clustering
len_accpt_points = math.floor((tot_lenTime)/rq_time_mean) remainig_days = days - day + 1 attr_divide = math.floor(len_points/len_accpt_points) if attr_divide==0: n_clusters = 1 elif math.floor(attr_divide/day)>2: n_clusters = math.floor(attr_divide/day) else: n_clusters = 2 X = np.array(df_city.loc[:,['Id', 'iPlannerRate', 'Lat', 'Long']].values, dtype=float) utils.plot_2d(X) utils.plot_3d(X) random_state = day # (cluster_labels, # cluster_centers, # max_cluster, # first_cluster_members) = clustering.kmeans_clsuster(X, n_clusters, # random_state) # df_city = df_city.iloc[first_cluster_members,:] df_city = df_city.iloc[:len_accpt_points,:] df_city = df_city.sort_values(by='iPlannerRate', ascending=False) if len(df_city) <= 6: break
concatenated_waves = normalize(concatenated_waves, norm='max') print(concatenated_waves.shape) # Fit-Transforming PCA on acquired waves: fitted = utils.pca_projections(concatenated_waves, 3, svd_solver='arpack') print(fitted.shape) xs = fitted[:, 0] ys = fitted[:, 1] zs = fitted[:, 2] utils.plot_3d(xs, ys, zs, title='NFPAT{} {} PCA projection'.format(patient_nr, scid), xlabel='PC1', ylabel='PC2', zlabel='PC3', s=1) # Gaussian Mixture Models and, if needed, additional clustering n_components = int(input('Type the number of components to split into: ')) cluster_means, clusters, ns, ks = utils.gaussian_mixture_pca_projections( fitted, concatenated_waves, n_components, patient_nr, scid, with_mahal=False)
concatenated_waves = normalize(concatenated_waves, norm='max') # Fit-Transforming PCA on acquired waves: fitted = utils.pca_projections(concatenated_waves, 3, svd_solver='arpack') print(fitted.shape) xs = fitted[:, 0] ys = fitted[:, 1] zs = fitted[:, 2] scid = 'nsc' utils.plot_3d(xs, ys, zs, title='PCA projection'.format(scid), xlabel='PC1', ylabel='PC2', zlabel='PC3', s=1) # Gaussian Mixture Models and, if needed, additional clustering n_components = int(input('Type the number of components to split into: ')) cluster_means, clusters, ns, ks = utils.gaussian_mixture_pca_projections( fitted, concatenated_waves, n_components) """ clusters = np.asarray(clusters) # Cleaning (only if additional clustering was needed): if ns is not None: print('\nCleaning...')
frame_len=winlen, frame_step=winstep) frames = [window(winlen) * (f - mean) / std for f in frames] if frames == []: continue f_rbm = grelurbm.get_features_v2( frames) #v2 es relu sin bias, v3 es mf con bias feats_df = feats_df.append( { "phn": label, "category": row["category"], "collapsed": row["collapsed"], "feats": f_rbm, "algth": "rbm_relu", "idx": ix }, ignore_index=True) grelurbm.close_session() del (grelurbm) from PrincipalComponentAnalysis import PCA feats = [] for ix, row in feats_df.iterrows(): feats.extend(row["feats"]) pca = PCA().compute_pca(feats, 3) plot_3d(pca, feats_df["collapsed"].values)