def plot_approximated_function(regr, x_range, y_range, filename, title=None):
    x_grid, y_grid = np.meshgrid(x_range, y_range)
    input_data = []
    for x1, x2 in zip(np.ravel(x_grid), np.ravel(y_grid)):
        input_data.append([x1, x2])
    input_data = np.array(input_data)
    z_value = np.array(regr(input_data))
    z_grid = np.reshape(z_value, (x_grid.shape[0], x_grid.shape[1]))
    utils.plot_3d(x_grid, y_grid, z_grid, filename, title)
def plot_approximated_function(regr,
                               session,
                               x_range,
                               y_range,
                               filename,
                               title=None):
    x_grid, y_grid = np.meshgrid(x_range, y_range)
    input_data = []
    for x1, x2 in zip(np.ravel(x_grid), np.ravel(y_grid)):
        input_data.append([x1, x2])
    input_data = np.array(input_data)
    z_value = np.array(regr.predict(session, input_data))
    z_grid = np.reshape(z_value, (x_grid.shape[0], x_grid.shape[1]))
    utils.plot_3d(x_grid, y_grid, z_grid,
                  "../images/" + filename.replace(".", ""), title)
Esempio n. 3
0

import utils
import tqdm


def precalc_logit_moments(mus, sigmas):
    keys = list(itertools.product(mus, sigmas))
    old = utils.get_values(keys, LOGIT_FNAME)
    for mu, sigma in tqdm.tqdm(keys):
        if (mu, sigma) not in old:
            avg, var = logit_norm_moments(mu, sigma)
            old[(mu, sigma)] = (avg, var)
    utils.dump_values(old, LOGIT_FNAME)
    LOGIT_CACHE.update(old)
    return old


if __name__ == '__main__':
    mus = np.arange(-6, 6, 0.2)
    sigmas = np.sort(np.arange(0.01, 7., 0.03))
    precalc_logit_moments(mus, sigmas)
    x, y, z1, z2 = [], [], [], []
    for mu, sigma in tqdm.tqdm(list(itertools.product(mus, sigmas))):
        avg, var = logit_norm_moments(mu, sigma)
        x.append(mu)
        y.append(sigma)
        z1.append(avg)
        z2.append(var)
    utils.plot_3d(x, y, z1, None)
    utils.plot_3d(x, y, z2, None)
Esempio n. 4
0
def test(test_id, dir, strength_scale, n_samples, num_features,
         num_instruments, num_treatments, num_outcomes):
    def tau_fn(x, p):
        return (
            -1.5 * x + .9 * (x**2)
        ) * p  #np.abs(p) * x # #np.abs(x) #-1.5 * x + .9 * (x**2)# 2/(1+np.exp(-2*x)) #-1.5 * x + .9 * (x**2) #np.abs(x) #-1.5 * x + .9 * (x**2)  #np.abs(x) #-1.5 * x + .9 * (x**2) #np.sin(x) #1. * (x<0) + 2.5 * (x>=0) #np.abs(x)  # 1. * (x<0) + 3. * (x>=0) #-1.5 * x + .9 * (x**2)  #-1.5 * x + .9 * (x**2) #np.abs(x) #-1.5 * x + .9 * (x**2) + x**3 #-1.5 * x + .9 * (x**2) + x**3 # np.sin(x) #-1.5 * x + .9 * (x**2) + x**3 #np.sin(x) #-1.5 * x + .9 * (x**2) + x**3 #np.sin(x) #np.abs(x) #np.sin(x) #2/(1+np.exp(-2*x)) #2/(1+np.exp(-2*x)) #1.5 * x - .9 * (x**2) #2/(1+np.exp(-2*x))#-1.5 * x + .9 * (x**2)

    iv_strength = strength_scale * np.random.uniform(
        1., 1.1, size=(num_instruments, 1))
    degree_benchmarks = 3

    # Network parameters
    hidden_layers = [1000, 1000, 1000]

    # Generate data
    data_x, data_z, data_treatment, data_y = get_data(n_samples,
                                                      num_instruments,
                                                      iv_strength, tau_fn,
                                                      num_features)
    data_z = np.concatenate((data_z, data_x), axis=1)
    data_p = np.concatenate((data_treatment, data_x), axis=1)
    num_instruments = num_features + num_instruments
    num_treatments = num_features + num_treatments
    print(data_p.shape)
    print(data_z.shape)
    print(data_y.shape)
    if num_instruments >= 2:
        plt.figure()
        plt.subplot(1, 4, 1)
        plt.scatter(data_z[:, 0], data_p[:, 0], label='p vs z1')
        plt.legend()
        plt.subplot(1, 4, 2)
        plt.scatter(data_z[:, 1], data_p[:, 0], label='p vs z2')
        plt.legend()
        plt.subplot(1, 4, 3)
        plt.scatter(data_p[:, 0], data_y)
        plt.legend()
        plt.subplot(1, 4, 4)
        plt.scatter(data_p[:, 1], data_y)
        plt.legend()
        plt.savefig(os.path.join(dir, 'data_{}.png'.format(test_id)))

    # We reset the whole graph
    dgmm = DeepGMM(
        n_critics=70,
        num_steps=200,
        store_step=5,
        learning_rate_modeler=0.01,
        learning_rate_critics=0.01,
        critics_jitter=True,
        dissimilarity_eta=0.0,
        cluster_type='kmeans',
        critic_type='Gaussian',
        critics_precision=None,
        min_cluster_size=200,  #num_trees=5,
        eta_hedge=0.16,
        bootstrap_hedge=False,
        l1_reg_weight_modeler=0.0,
        l2_reg_weight_modeler=0.0,
        dnn_layers=hidden_layers,
        dnn_poly_degree=1,
        log_summary=False,
        summary_dir='./graphs_monte')
    dgmm.fit(data_z, data_p, data_y)

    test_min = np.percentile(data_p, 10)
    test_max = np.percentile(data_p, 90)
    test_grid = np.array(
        list(
            itertools.product(np.linspace(test_min, test_max, 100),
                              repeat=num_treatments)))
    print(test_grid.shape)

    test_data_x, _, test_data_treatment, _ = get_data(5 * n_samples,
                                                      num_instruments,
                                                      iv_strength, tau_fn,
                                                      num_features)
    test_data_p = np.concatenate((test_data_treatment, test_data_x), axis=1)
    print(test_data_p.shape)
    clip_edges = (np.all((test_data_p > test_min), axis=1) & np.all(
        (test_data_p < test_max), axis=1)).flatten()
    test_data_p = test_data_p[clip_edges, :]
    test_data_treatment = test_data_treatment[clip_edges, :]
    test_data_x = test_data_x[clip_edges, :]
    print(test_data_p.shape)

    best_fn_grid = dgmm.predict(test_grid, model='best')
    final_fn_grid = dgmm.predict(test_grid, model='final')
    avg_fn_grid = dgmm.predict(test_grid, model='avg')
    best_fn_dist = dgmm.predict(test_data_p, model='best')
    final_fn_dist = dgmm.predict(test_data_p, model='final')
    avg_fn_dist = dgmm.predict(test_data_p, model='avg')

    ##################################
    # Benchmarks
    ##################################
    from sklearn.linear_model import LinearRegression, MultiTaskElasticNet, ElasticNet
    from sklearn.preprocessing import PolynomialFeatures
    from sklearn.pipeline import Pipeline
    from sklearn.neural_network import MLPRegressor

    direct_poly = Pipeline([('poly',
                             PolynomialFeatures(degree=degree_benchmarks)),
                            ('linear', LinearRegression())])
    direct_poly.fit(data_p, data_y.flatten())
    direct_poly_fn_grid = direct_poly.predict(test_grid)
    direct_poly_fn_dist = direct_poly.predict(test_data_p)

    direct_nn = MLPRegressor(hidden_layer_sizes=hidden_layers)
    direct_nn.fit(data_p, data_y.flatten())
    direct_nn_fn_grid = direct_nn.predict(test_grid)
    direct_nn_fn_dist = direct_nn.predict(test_data_p)

    plf = PolynomialFeatures(degree=degree_benchmarks)
    sls_poly_first = MultiTaskElasticNet()
    sls_poly_first.fit(plf.fit_transform(data_z), plf.fit_transform(data_p))
    sls_poly_second = ElasticNet()
    sls_poly_second.fit(sls_poly_first.predict(plf.fit_transform(data_z)),
                        data_y)
    sls_poly_fn_grid = sls_poly_second.predict(plf.fit_transform(test_grid))
    sls_poly_fn_dist = sls_poly_second.predict(plf.fit_transform(test_data_p))

    sls_first = LinearRegression()
    sls_first.fit(data_z, data_p)
    sls_second = LinearRegression()
    sls_second.fit(sls_first.predict(data_z), data_y)
    sls_fn_grid = sls_second.predict(test_grid)
    sls_fn_dist = sls_second.predict(test_data_p)

    ######
    # Deep IV
    #####
    # We reset the whole graph
    with tf.name_scope("DeepIV"):
        deep_iv = deep_iv_fit(data_x,
                              data_z,
                              data_treatment,
                              data_y,
                              epochs=10,
                              hidden=hidden_layers)
        deep_iv_fn_grid = deep_iv.predict([test_grid[:, 1], test_grid[:, 0]])
        deep_iv_fn_dist = deep_iv.predict([test_data_x, test_data_treatment])

    plt.figure()
    plot_3d(test_grid, tau_fn(test_grid[:, [1]], test_grid[:, [0]]).flatten())
    plt.savefig(os.path.join(dir, 'true_{}.png'.format(test_id)))

    print(avg_fn_grid.shape)
    plt.figure()
    plot_3d(test_grid, avg_fn_grid.flatten())
    plt.savefig(os.path.join(dir, 'avg_fn_{}.png'.format(test_id)))

    plt.figure()
    plot_3d(test_grid, best_fn_grid.flatten())
    plt.savefig(os.path.join(dir, 'best_fn_{}.png'.format(test_id)))

    plt.figure()
    plot_3d(test_grid, final_fn_grid.flatten())
    plt.savefig(os.path.join(dir, 'final_fn_{}.png'.format(test_id)))

    plt.figure()
    plot_3d(test_grid, deep_iv_fn_grid.flatten())
    plt.savefig(os.path.join(dir, 'deep_iv_{}.png'.format(test_id)))

    plt.figure()
    plot_3d(test_grid, sls_poly_fn_grid.flatten())
    plt.savefig(os.path.join(dir, 'sls_poly_{}.png'.format(test_id)))

    plt.figure()
    plot_3d(test_grid, sls_fn_grid.flatten())
    plt.savefig(os.path.join(dir, 'sls_{}.png'.format(test_id)))

    plt.figure()
    plot_3d(test_grid, direct_poly_fn_grid.flatten())
    plt.savefig(os.path.join(dir, 'direct_poly_{}.png'.format(test_id)))

    plt.figure()
    plot_3d(test_grid, direct_nn_fn_grid.flatten())
    plt.savefig(os.path.join(dir, 'direct_nn_{}.png'.format(test_id)))

    def mse_test(y_true, y_pred):
        return 1 - np.mean((y_pred.flatten() - y_true.flatten())**2) / np.var(
            y_true.flatten())

    mse_best = mse_test(tau_fn(test_data_x, test_data_treatment), best_fn_dist)
    mse_final = mse_test(tau_fn(test_data_x, test_data_treatment),
                         final_fn_dist)
    mse_avg = mse_test(tau_fn(test_data_x, test_data_treatment), avg_fn_dist)
    mse_2sls_poly = mse_test(tau_fn(test_data_x, test_data_treatment),
                             sls_poly_fn_dist)
    mse_direct_poly = mse_test(tau_fn(test_data_x, test_data_treatment),
                               direct_poly_fn_dist)
    mse_direct_nn = mse_test(tau_fn(test_data_x, test_data_treatment),
                             direct_nn_fn_dist)
    mse_2sls = mse_test(tau_fn(test_data_x, test_data_treatment), sls_fn_dist)
    mse_deep_iv = mse_test(tau_fn(test_data_x, test_data_treatment),
                           deep_iv_fn_dist)

    on_p_dist = [
        mse_best, mse_final, mse_avg, mse_deep_iv, mse_2sls_poly, mse_2sls,
        mse_direct_poly, mse_direct_nn
    ]

    mse_best = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]),
                        best_fn_grid)
    mse_final = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]),
                         final_fn_grid)
    mse_avg = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]),
                       avg_fn_grid)
    mse_2sls_poly = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]),
                             sls_poly_fn_grid)
    mse_direct_poly = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]),
                               direct_poly_fn_grid)
    mse_direct_nn = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]),
                             direct_nn_fn_grid)
    mse_2sls = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]),
                        sls_fn_grid)
    mse_deep_iv = mse_test(tau_fn(test_grid[:, [1]], test_grid[:, [0]]),
                           deep_iv_fn_grid)

    on_p_grid = [
        mse_best, mse_final, mse_avg, mse_deep_iv, mse_2sls_poly, mse_2sls,
        mse_direct_poly, mse_direct_nn
    ]

    return on_p_dist, on_p_grid
Esempio n. 5
0
pwd_stacked = '/home/ov/data/stacked/'
pwd_stacked_out = pwd_stacked + 'nfpat' + str(patient_nr) + scid + '.stacked_icp_abp.npy'
# =============================================================================
# Load waves:
stacked_waves = np.load(pwd_stacked_out)
"""
stacked_waves = load_waves_by_pat_scid(patids, scid, pwd)

waves = utils.acces_waves_by_type(None, stacked_waves, wave_type)
print('Stacked size: {}'.format(stacked_waves.shape))
print('Unstacked {} waves: {}'.format(wave_type, waves.shape))
# =============================================================================
# Fit-Transforming PCA on acquired waves:
fitted = utils.pca_projections(waves, 3, svd_solver='arpack')
print('Fitted PCA: {}'.format(fitted.shape))
"""
xs = fitted[:, 0]
ys = fitted[:, 1]
zs = fitted[:, 2]

utils.plot_3d(xs, ys, zs,
              title='PCA projection of {} waves of PAT{} ({})'.format(wave_type,
                                                                      patient_nr,
                                                                      scid),
              xlabel='PC1',
              ylabel='PC2',
              zlabel='PC3',
              s=1)
"""
# =============================================================================
# Gaussian Mixture Models and, if needed, additional clustering
Esempio n. 6
0
                len_accpt_points = math.floor((tot_lenTime)/rq_time_mean)
                remainig_days = days - day + 1
                attr_divide = math.floor(len_points/len_accpt_points)
                 
                if attr_divide==0:
                    n_clusters = 1
                elif math.floor(attr_divide/day)>2:
                    n_clusters = math.floor(attr_divide/day)
                else:
                    n_clusters = 2

                X = np.array(df_city.loc[:,['Id', 'iPlannerRate', 'Lat', 'Long']].values, 
                         dtype=float)
                
                utils.plot_2d(X)
                utils.plot_3d(X)
                random_state = day
                # (cluster_labels, 
                #  cluster_centers, 
                #  max_cluster, 
                #  first_cluster_members) = clustering.kmeans_clsuster(X, n_clusters, 
                #                                                    random_state)
            
                # df_city = df_city.iloc[first_cluster_members,:]
                
                df_city = df_city.iloc[:len_accpt_points,:]
                
                df_city = df_city.sort_values(by='iPlannerRate', ascending=False)    
                
                if len(df_city) <= 6:
                    break
Esempio n. 7
0
concatenated_waves = normalize(concatenated_waves, norm='max')

print(concatenated_waves.shape)

# Fit-Transforming PCA on acquired waves:
fitted = utils.pca_projections(concatenated_waves, 3, svd_solver='arpack')
print(fitted.shape)

xs = fitted[:, 0]
ys = fitted[:, 1]
zs = fitted[:, 2]

utils.plot_3d(xs,
              ys,
              zs,
              title='NFPAT{} {} PCA projection'.format(patient_nr, scid),
              xlabel='PC1',
              ylabel='PC2',
              zlabel='PC3',
              s=1)

# Gaussian Mixture Models and, if needed, additional clustering
n_components = int(input('Type the number of components to split into: '))

cluster_means, clusters, ns, ks = utils.gaussian_mixture_pca_projections(
    fitted,
    concatenated_waves,
    n_components,
    patient_nr,
    scid,
    with_mahal=False)
Esempio n. 8
0
concatenated_waves = normalize(concatenated_waves, norm='max')

# Fit-Transforming PCA on acquired waves:
fitted = utils.pca_projections(concatenated_waves, 3, svd_solver='arpack')
print(fitted.shape)

xs = fitted[:, 0]
ys = fitted[:, 1]
zs = fitted[:, 2]

scid = 'nsc'

utils.plot_3d(xs,
              ys,
              zs,
              title='PCA projection'.format(scid),
              xlabel='PC1',
              ylabel='PC2',
              zlabel='PC3',
              s=1)

# Gaussian Mixture Models and, if needed, additional clustering
n_components = int(input('Type the number of components to split into: '))

cluster_means, clusters, ns, ks = utils.gaussian_mixture_pca_projections(
    fitted, concatenated_waves, n_components)
"""
clusters = np.asarray(clusters)

# Cleaning (only if additional clustering was needed):
if ns is not None:
    print('\nCleaning...')
Esempio n. 9
0
                                  frame_len=winlen,
                                  frame_step=winstep)
    frames = [window(winlen) * (f - mean) / std for f in frames]

    if frames == []:
        continue
    f_rbm = grelurbm.get_features_v2(
        frames)  #v2 es relu sin bias, v3 es mf con bias
    feats_df = feats_df.append(
        {
            "phn": label,
            "category": row["category"],
            "collapsed": row["collapsed"],
            "feats": f_rbm,
            "algth": "rbm_relu",
            "idx": ix
        },
        ignore_index=True)

grelurbm.close_session()
del (grelurbm)

from PrincipalComponentAnalysis import PCA
feats = []
for ix, row in feats_df.iterrows():
    feats.extend(row["feats"])

pca = PCA().compute_pca(feats, 3)

plot_3d(pca, feats_df["collapsed"].values)