if gender == "" or genderdata[userID]["gender"] == gender:
                        x.append(cnt)
                        y.append(weekdata[str(day)][userID]["RW"])
            cnt += 1
        if str(day) in weekdata and str(day) == str(changedate):
            change_cnt = cnt
            print()
            print(
                str(change_cnt) + ": " + str(day) +
                " (excluded week as cut point for treatment)")
            cnt += 1

data = pd.DataFrame({'y': y, 'x': x})

#bandwidth_opt = rdd.optimal_bandwidth(data['y'], data['x'], cut=change_cnt)
#logging.info("Optimal bandwidth:" + str(bandwidth_opt))

data_rdd = rdd.truncated_data(data, 'x', bandwidth, cut=change_cnt)

print()
print("Number of observations per week in this model: ")
print(data_rdd["x"].value_counts())
print()
print()
model = rdd.rdd(data_rdd, 'x', 'y', cut=change_cnt)
print()
print(model.fit().summary())
print()
log_endtime = datetime.datetime.now()
log_runtime = (log_endtime - log_starttime)
logging.info("Total runtime: " + str(log_runtime))
Beispiel #2
0
flag_optimal_bandwidth = 0

h = rdd.optimal_bandwidth(data['y'], data['x'], 1)
if np.round(h, 5) != .75117:
    print("\tFAIL: value of h is wrong")
    flag_optimal_bandwidth = 1
if flag_optimal_bandwidth == 0:
    print("\tNo Failures")

# TEST truncated_data()

data_rdd = rdd.truncated_data(data, 'x', h, cut=1)

# TEST rdd()

model = rdd.rdd(data_rdd, 'x', 'y', cut=1)

print(model.fit().summary())

# TEST bin_data()

data_binned = rdd.bin_data(data, 'y', 'x', 100)

plt.figure()
plt.scatter(data_binned['x'],
            data_binned['y'],
            s=data_binned['n_obs'],
            facecolors='none',
            edgecolors='r')
plt.show()
plt.close()
Beispiel #3
0
results = list()

for run in tqdm(range(num_sims)):
    run = list()
    for noise in noise_levels:
        run_noise = list()
        for angle in angles:
            x, y, _ = datafun(n=n, b=b, angle=angle, noise_sd=noise)
            
            qed = BNQD.BnpQedAnalysis(x, y, kernel_dict, labelFunc, b=b, opts=opts)
            _ = qed.train()
            results_df = qed.pretty_print(verbose=False)

            data = pd.DataFrame({'y':y, 'x': x})
            rddmodel = rdd.rdd(data, 'x', 'y', cut=b, verbose=False)
            fit = rddmodel.fit()
            rddpval = fit.pvalues['TREATED']
            gp_pvals = qed.get_rdd_p_values()
            
            simulation = dict()
            simulation['bnpqed'] = results_df
            simulation['freq'] = rddpval
            simulation['gp_pval'] = gp_pvals
            run_noise.append(simulation)
            
            del qed
        run.append(run_noise)
    results.append(run)
        
Beispiel #4
0
treatment = np.where(x >= threshold, 1, 0)
w1 = np.random.normal(0, 1, N)
w2 = np.random.normal(0, 4, N)
y = .5 * treatment + 2 * x - .2 * w1 + 1 + epsilon

data = pd.DataFrame({'y': y, 'x': x, 'w1': w1, 'w2': w2})
data.head()

bandwidth_opt = rdd.optimal_bandwidth(data['y'], data['x'], cut=threshold)

print("Optimal bandwidth:", bandwidth_opt)
data_rdd = rdd.truncated_data(data, 'x', bandwidth_opt, cut=threshold)

#x = running variable
#y = outcome variables
model = rdd.rdd(data_rdd, 'x', 'y', cut=threshold)
print(model.fit().summary())

df_RD = pd.io.stata.read_stata(
    r'/Users/gopaljuneja/Desktop/Microenterprise_Kenya/113714-V1/App2017-0042_data/datasets/RD_Dataset.dta'
)
df_RD.to_csv('/Users/gopaljuneja/Desktop/Reproduced_MEK/RD_Dataset.csv')

bw100 = 100
bw150 = 150
bw200 = 200
threshold = 1
rdd.optimal_bandwidth()

band100 = df_RD.loc[(df_RD['ce_std'] <= 1) & (df_RD['ce_std'] >= -1 * 1)]
band150 = df_RD.loc[(df_RD['ce_std'] <= 1.5) & (df_RD['ce_std'] >= -1 * 1.5)]
kernels         = [linear_kernel, std_periodic_kernel, RBF_kernel]

kernel_dict = dict(zip(kernel_names, kernels))

opts = dict()
opts['num_restarts'] = 50
opts['mode'] = 'BIC'
opts['verbose'] = False

qed = BNQD.BnpQedAnalysis(x, y, kernel_dict, labelFunc, b=bz, opts=opts)
qed.train()

results_df  = qed.pretty_print()
gp_pvals    = qed.get_rdd_p_values()
rdddata     = pd.DataFrame({'y':y, 'x': x})
rddmodel    = rdd.rdd(rdddata, 'x', 'y', cut=bz, verbose=False)
fit         = rddmodel.fit()
rddpval     = fit.pvalues['TREATED']        



es_fig, es_axes = qed.plot_effect_sizes()
mf_fig, mf_axes = qed.plot_model_fits(x_test)
# do for bottom row
for ax in mf_axes[len(kernel_dict)-1, :]:
    ax.set_xticks(zscorex(np.arange(1, n, step=12)))
    ax.set_xticklabels(np.unique(data['year']))
    ax.set_xlabel('Time')
    
# do for left column
for ax in mf_axes.flatten():