def ci_cmle_boot(X, v, alpha_level): arr = array_from_data(X, [v]) A = arr.as_dense() r = A.sum(1) c = A.sum(0) s_model = StationaryLogistic() s_model.beta['x_0'] = None fit_model = FixedMargins(s_model) arr.new_row_covariate('r', np.int)[:] = r arr.new_col_covariate('c', np.int)[:] = c fit_model.fit = fit_model.base_model.fit_conditional fit_model.confidence_boot(arr, alpha_level = alpha_level) return fit_model.conf['x_0']['pivotal']
return params['cov_mult'] * np.sqrt(3) else: if (i_1 - i_2 + 1 - m) % N == 0: return params['cov_mult'] * np.sqrt(3) return 0 else: print 'Unrecognized covariate structure.' import sys; sys.exit() net.new_edge_covariate(name).from_binary_function_ind(f_x) # Specify data model as generation permuation networks net.new_node_covariate_int('r')[:] = 1 net.new_node_covariate_int('c')[:] = 1 data_model = FixedMargins(data_model, 'r', 'c') coverage_levels = np.append(0.0, np.cumsum(params['coverage_increments'])) traces = { 'wall_time': [], 'nll': [] } for rep in range(params['num_reps']): net.generate(data_model, arbitrary_init = params['arb_init']) wall_time_trace = [net.gen_info['wall_time']] nll_trace = [data_model.nll(net)] for coverage_inc in params['coverage_increments']: data_model.gibbs_improve_perm(net, net.adjacency_matrix(), coverage_inc) wall_time_trace.append(net.gen_info['wall_time'])
print 'NLL: %.2f' % c_model.nll(net) print 'kappa: %.2f' % c_model.kappa for cov_name in cov_names: print '%s: %.2f' % (cov_name, c_model.beta[cov_name]) print # Sample typical networks from fit models n_samples = 100 s_samples = np.empty((n_samples, net.N, net.N)) ns_samples = np.empty((n_samples, net.N, net.N)) c_samples = np.empty((n_samples, net.N, net.N)) r, c = A.sum(1), A.sum(0) for rep in range(n_samples): s_samples[rep, :, :] = s_model.generate(net) ns_samples[rep, :, :] = ns_model.generate(net) c_samples[rep, :, :] = FixedMargins(c_model).generate(net) # Calculate sample means and variances s_samples_mean = np.mean(s_samples, axis=0) s_samples_sd = np.sqrt(np.var(s_samples, axis=0)) ns_samples_mean = np.mean(ns_samples, axis=0) ns_samples_sd = np.sqrt(np.var(ns_samples, axis=0)) c_samples_mean = np.mean(c_samples, axis=0) c_samples_sd = np.sqrt(np.var(c_samples, axis=0)) # Finish plotting plt.subplot(334) plt.title('Stationary') heatmap(s_samples_mean) plt.subplot(337) residuals(s_samples_mean, s_samples_sd)
def f_x(i_1, i_2): return np.random.normal(0, params['cov_norm_sd']) elif params['cov_disc_sd'] > 0.0: def f_x(i_1, i_2): return (params['cov_disc_sd'] * (np.sign(np.random.random() - 0.5))) else: print 'Error: no covariate distribution specified.' sys.exit() net.new_edge_covariate(name).from_binary_function_ind(f_x) # Specify data model as generation of permuation networks net.new_node_covariate_int('r')[:] = 1 net.new_node_covariate_int('c')[:] = 1 data_model = FixedMargins(data_model, 'r', 'c', coverage = 2.0) if params['fit_nonstationary']: fit_model = NonstationaryLogistic() else: fit_model = StationaryLogistic() for b in data_model.base_model.beta: fit_model.beta[b] = 0.0 # Set up recording of results from experiment results = Results(params['sub_sizes'], params['sub_sizes'], params['num_reps']) add_array_stats(results) def true_est_theta_b(b): return (lambda d, f: d.base_model.beta[b]), (lambda d, f: f.beta[b]) for b in fit_model.beta: # Need to do this hackily to avoid for-loop/lambda-binding weirdness.
print print 'Fitting nonstationary model' alpha_zero(net) ns_model = NonstationaryLogistic() for cov_name in cov_names: ns_model.beta[cov_name] = None ns_model.fit(net, verbose = True) print 'NLL: %.2f' % ns_model.nll(net) print 'kappa: %.2f' % ns_model.kappa for cov_name in cov_names: print '%s: %.2f' % (cov_name, ns_model.beta[cov_name]) print print 'Fitting conditional model' c_model = FixedMargins(StationaryLogistic()) for cov_name in cov_names: c_model.base_model.beta[cov_name] = None c_model.base_model.fit_conditional(net, verbose = True) print 'NLL: %.2f' % c_model.nll(net) for cov_name in cov_names: print '%s: %.2f' % (cov_name, c_model.base_model.beta[cov_name]) print # Sample typical networks from fit models reps = 100 s_samples = np.empty((reps, net.N, net.N)) ns_samples = np.empty((reps, net.N, net.N)) c_samples = np.empty((reps, net.N, net.N)) for rep in range(reps): s_samples[rep,:,:] = s_model.generate(net)
# Generate Bernoulli probabilities from logistic regression model logit_P = np.zeros((M,N)) + kappa logit_P += alpha logit_P += beta logit_P += theta * v logit_P += offset if conditional_sample: arr = Array(M, N) arr.new_edge_covariate('x_0')[:] = logit_P arr.new_row_covariate('r', dtype = np.int)[:] = r arr.new_col_covariate('c', dtype = np.int)[:] = c base_model = StationaryLogistic() base_model.beta['x_0'] = 1.0 data_model = FixedMargins(base_model) while True: # Advance random seed for data generation seed.next() # Generate data for this trial if conditional_sample: X = data_model.generate(arr, coverage = 100.0) else: P = 1.0 / (1.0 + np.exp(-logit_P)) X = np.random.random((M,N)) < P yield X, v def timing(func):
ns_fit.confidence_wald(new) wn_ci_l, wn_ci_u = safe_ci(ns_fit, 'x_0', 'wald') if wn_ci_l < theta < wn_ci_u: wn_covered += 1 ns_fit.confidence_boot(new, n_bootstrap = n_boot, alpha_level = alpha_level) bn_ci_l, bn_ci_u = ns_fit.conf['x_0']['pivotal'] if bn_ci_l < theta < bn_ci_u: bn_covered += 1 A = new.as_dense() r = A.sum(1) c = A.sum(0) c_fit = FixedMargins(s_fit) new.new_row_covariate('r', np.int)[:] = r new.new_col_covariate('c', np.int)[:] = c c_fit.fit = c_fit.base_model.fit_conditional c_fit.reset_confidence() c_fit.confidence_wald(new) wc_ci_l, wc_ci_u = safe_ci(c_fit, 'x_0', 'wald') if wc_ci_l < theta < wc_ci_u: wc_covered += 1 c_fit.confidence_boot(new, n_bootstrap = n_boot, alpha_level = alpha_level) bc_ci_l, bc_ci_u = c_fit.conf['x_0']['pivotal'] if bc_ci_l < theta < bc_ci_u: bc_covered += 1