def do_experiment(params): seed = Seed(params['random_seed']) alpha = params['alpha_level'] verbose = params['verbose'] L = params['L'] S = len(params['n_MC_levels']) T = params['is_T'] # Set up structure and methods for recording results results = {'completed_trials': 0} for method, display in [('cmle_a', 'CMLE-A'), ('cmle_is', 'CMLE-IS (T = %d)' % T)] + \ [('cons_%d' % s, 'Conservative (n = %d)' % n_MC) for s, n_MC in enumerate(params['n_MC_levels'])]: results[method] = { 'display': display, 'in_interval': [], 'length': [], 'total_time': 0.0 } def do_and_record(out, name): ci, elapsed = out ci_l, ci_u = ci result = results[name] print '%s (%.2f sec): [%.2f, %.2f]' % \ (result['display'], elapsed, ci_l, ci_u) result['in_interval'].append(ci_l <= params['theta'] <= ci_u) result['length'].append(ci_u - ci_l) result['total_time'] += elapsed # Do experiment for X, v in generate_data(params, seed): if (results['completed_trials'] == params['n_rep']) or terminated: break theta_grid = np.linspace(params['theta_l'], params['theta_u'], L) do_and_record(ci_cmle_a(X, v, theta_grid, alpha), 'cmle_a') do_and_record(ci_cmle_is(X, v, theta_grid, alpha, T, verbose), 'cmle_is') for s, n_MC in enumerate(params['n_MC_levels']): do_and_record( ci_conservative(X, v, n_MC, theta_grid, alpha, verbose), 'cons_%d' % s) results['completed_trials'] += 1 # For verifying that same data was generated even if different # algorithms consumed a different amount of randomness seed.final() return results
def do_experiment(params): seed = Seed(params['random_seed']) alpha = params['alpha_level'] verbose = params['verbose'] L = params['L'] S = len(params['n_MC_levels']) T = params['is_T'] # Set up structure and methods for recording results results = { 'completed_trials': 0 } for method, display in [('cmle_a', 'CMLE-A'), ('cmle_is', 'CMLE-IS (T = %d)' % T)] + \ [('cons_%d' % s, 'Conservative (n = %d)' % n_MC) for s, n_MC in enumerate(params['n_MC_levels'])]: results[method] = { 'display': display, 'in_interval': [], 'length': [], 'total_time': 0.0 } def do_and_record(out, name): ci, elapsed = out ci_l, ci_u = ci result = results[name] print '%s (%.2f sec): [%.2f, %.2f]' % \ (result['display'], elapsed, ci_l, ci_u) result['in_interval'].append(ci_l <= params['theta'] <= ci_u) result['length'].append(ci_u - ci_l) result['total_time'] += elapsed # Do experiment for X, v in generate_data(params, seed): if (results['completed_trials'] == params['n_rep']) or terminated: break theta_grid = np.linspace(params['theta_l'], params['theta_u'], L) do_and_record(ci_cmle_a(X, v, theta_grid, alpha), 'cmle_a') do_and_record(ci_cmle_is(X, v, theta_grid, alpha, T, verbose), 'cmle_is') for s, n_MC in enumerate(params['n_MC_levels']): do_and_record(ci_conservative(X, v, n_MC, theta_grid, alpha, verbose), 'cons_%d' % s) results['completed_trials'] += 1 # For verifying that same data was generated even if different # algorithms consumed a different amount of randomness seed.final() return results
def do_experiment(params): if params['dump_fits'] and params['load_fits']: print 'Warning: simultaneously dumping and loading is a bad idea.' if params['dump_fits']: fits = [] if params['load_fits']: with open(params['load_fits'], 'r') as fits_file: loaded_params_pick, loaded_fits = json.load(fits_file) loaded_params = dict([(k, unpick(v)) for (k, v) in loaded_params_pick]) # Compare on parameters that control data generation and inference run_params = [ 'N', 'B', 'theta_sd', 'theta_fixed', 'alpha_unif_sd', 'alpha_norm_sd', 'alpha_gamma_sd', 'cov_unif_sd', 'cov_norm_sd', 'cov_disc_sd', 'kappa_target', 'pre_offset', 'post_fit', 'fit_nonstationary', 'fit_method', 'num_reps', 'is_T', 'sampling', 'sub_sizes_r', 'sub_sizes_c', 'random_seed' ] for p in run_params: if not np.all(loaded_params[p] == params[p]): print 'Warning: load mismatch on', p # Set random seed for reproducible output seed = Seed(params['random_seed']) # Initialize full network arr = Network(params['N']) # Generate node-level propensities to extend and receive edges if params['alpha_norm_sd'] > 0.0: alpha_norm(arr, params['alpha_norm_sd']) elif params['alpha_unif_sd'] > 0.0: alpha_unif(arr, params['alpha_unif_sd']) elif params['alpha_gamma_sd'] > 0.0: # Choosing location somewhat arbitrarily to give unit skewness alpha_gamma(arr, 4.0, params['alpha_gamma_sd']) else: alpha_zero(arr) # Generate covariates and associated coefficients data_model = NonstationaryLogistic() covariates = [] for b in range(params['B']): name = 'x_%d' % b covariates.append(name) if name in params['theta_fixed']: data_model.beta[name] = params['theta_fixed'][name] else: data_model.beta[name] = np.random.normal(0, params['theta_sd']) if params['cov_unif_sd'] > 0.0: c = np.sqrt(12) / 2 def f_x(i_1, i_2): return np.random.uniform(-c * params['cov_unif_sd'], c * params['cov_unif_sd']) elif params['cov_norm_sd'] > 0.0: def f_x(i_1, i_2): return np.random.normal(0, params['cov_norm_sd']) elif params['cov_disc_sd'] > 0.0: def f_x(i_1, i_2): return (params['cov_disc_sd'] * (np.sign(np.random.random() - 0.5))) else: print 'Error: no covariate distribution specified.' sys.exit() arr.new_edge_covariate(name).from_binary_function_ind(f_x) # Generate large network, if necessary if not params['sampling'] == 'new': data_model.match_kappa(arr, params['kappa_target']) arr.generate(data_model) if params['fit_nonstationary']: fit_model = NonstationaryLogistic() else: fit_model = StationaryLogistic() for c in covariates: fit_model.beta[c] = None # Set up recording of results from experiment results = Results(params['sub_sizes_r'], params['sub_sizes_c'], params['num_reps'], interactive=params['interactive']) add_array_stats(results) if params['plot_sig']: from scipy.stats import chi2 crit = lambda dof: -0.5 * chi2.ppf(0.95, dof) umle_f = lambda n, f: f.nll(n, ignore_offset=True) umle_d = lambda n, d: d.nll(n, ignore_offset=True) umle_n = lambda n: NonstationaryLogistic().nll(n, ignore_offset=True) results.new('UMLE F-N', 'nm', lambda n, d, f: umle_f(n, f) - umle_n(n)) results.new('UMLE F-D', 'nm', lambda n, d, f: umle_f(n, f) - umle_d(n, d)) cmle_a_f = lambda n, f: acnll(n.as_dense(), np.exp(f.edge_probabilities(n))) cmle_a_d = lambda n, d: acnll(n.as_dense(), np.exp(d.edge_probabilities(n))) cmle_a_n = lambda n: acnll(n.as_dense(), np.ones_like(n.as_dense())) results.new('CMLE-A F-N', 'nm', lambda n, d, f: cmle_a_f(n, f) - cmle_a_n(n)) results.new('CMLE-A F-D', 'nm', lambda n, d, f: cmle_a_f(n, f) - cmle_a_d(n, d)) cmle_is_f = lambda n, f: f.fit_conditional(n, evaluate=True, T=50) cmle_is_d = lambda n, d: d.fit_conditional(n, evaluate=True, T=50) cmle_is_n = lambda n: NonstationaryLogistic().fit_conditional( n, evaluate=True, T=50) results.new('CMLE-IS F-N', 'nm', lambda n, d, f: cmle_is_f(n, f) - cmle_is_n(n)) results.new('CMLE-IS F-D', 'nm', lambda n, d, f: cmle_is_f(n, f) - cmle_is_d(n, d)) c_cmle_f = lambda n, f: f.fit_c_conditional(n, evaluate=True) c_cmle_d = lambda n, d: d.fit_c_conditional(n, evaluate=True) c_cmle_n = lambda n: NonstationaryLogistic().fit_c_conditional( n, evaluate=True) results.new('C-CMLE F-N', 'nm', lambda n, d, f: c_cmle_f(n, f) - c_cmle_n(n)) results.new('C-CMLE F-D', 'nm', lambda n, d, f: c_cmle_f(n, f) - c_cmle_d(n, d)) results.new('UMLE sig.', 'dof', lambda M, N, B: crit((M - 1) + (N - 1) + 1 + B)) results.new('CMLE sig.', 'dof', lambda M, N, B: crit(B)) results.new('C-CMLE sig.', 'dof', lambda M, N, B: crit((M - 1) + B)) if params['sampling'] == 'new': results.new('Subnetwork kappa', 'm', lambda d, f: d.kappa) def true_est_theta_c(c): return (lambda d, f: d.beta[c]), (lambda d, f: f.beta[c]) for c in covariates: # Need to do this hackily to avoid for-loop/lambda-binding weirdness. f_true, f_est = true_est_theta_c(c) results.new('True theta_{%s}' % c, 'm', f_true) results.new('Est. theta_{%s}' % c, 'm', f_est) if params['pre_offset'] or params['post_fit']: results.new('# Active', 'n', lambda n: np.isfinite(n.offset.matrix()).sum()) else: results.new('# Active', 'n', lambda n: n.M * n.N) if params['fisher_information']: def info_theta_c(c): def f_info_theta_c(d, f): return d.I_inv['theta_{%s}' % c] return f_info_theta_c for c in covariates: results.new('Info theta_{%s}' % c, 'm', info_theta_c(c)) if params['baseline']: def rel_mse_p_ij(n, d, f): P = d.edge_probabilities(n) return rel_mse(f.edge_probabilities(n), f.baseline(n), P) results.new('Rel. MSE(P_ij)', 'nm', rel_mse_p_ij) if not (params['pre_offset'] or params['post_fit']): def rel_mse_logit_p_ij(n, d, f): logit_P = d.edge_probabilities(n, logit=True) logit_Q = f.baseline_logit(n) return rel_mse(f.edge_probabilities(n, logit=True), logit_Q, logit_P) results.new('Rel. MSE(logit P_ij)', 'nm', rel_mse_logit_p_ij) if params['fit_method'] in [ 'convex_opt', 'conditional', 'c_conditional', 'irls', 'conditional_is' ]: results.new('Wall time (sec.)', 'm', lambda d, f: f.fit_info['wall_time']) if params['fit_method'] in ['convex_opt', 'conditional', 'conditional_is']: def work(f): w = 0 for work_type in ['nll_evals', 'grad_nll_evals', 'cnll_evals']: if work_type in f.fit_info: w += f.fit_info[work_type] return w results.new('Work', 'm', lambda d, f: work(f)) results.new('||ET_final - T||_2', 'm', lambda d, f: l2(f.fit_info['grad_nll_final'])) for sub_size in zip(results.M_sizes, results.N_sizes): print 'subnetwork size =', sub_size if params['sampling'] == 'new': gen = RandomSubnetworks(arr, sub_size) else: gen = RandomSubnetworks(arr, sub_size, method=params['sampling']) for rep in range(params['num_reps']): seed.next() sub = gen.sample() if params['fisher_information']: data_model.fisher_information(sub) if params['sampling'] == 'new': data_model.match_kappa(sub, params['kappa_target']) sub.generate(data_model) if params['load_fits']: fit, loaded_fits = loaded_fits[0], loaded_fits[1:] fit_model.beta = unpick(fit['theta']) if params['fix_broken_cmle_is']: for b_n in fit_model.beta: fit_model.beta[b_n] += 0.1474 if 'alpha' in fit: sub.row_covariates['alpha_out'] = unpick(fit['alpha']) if 'beta' in fit: sub.col_covariates['alpha_in'] = unpick(fit['beta']) if 'kappa' in fit: fit_model.kappa = fit['kappa'] if 'offset' in fit: sub.offset = unpick(fit['offset']) if 'fit_info' in fit: fit_model.fit_info = unpick(fit['fit_info']) else: if params['pre_offset']: sub.offset_extremes() if params['fit_method'] == 'convex_opt': fit_model.fit_convex_opt(sub, verbose=params['verbose']) elif params['fit_method'] == 'irls': fit_model.fit_irls(sub, verbose=params['verbose']) elif params['fit_method'] == 'logistic': fit_model.fit_logistic(sub) elif params['fit_method'] == 'logistic_l2': fit_model.fit_logistic_l2(sub, prior_precision=1.0) elif params['fit_method'] == 'conditional': fit_model.fit_conditional(sub, verbose=params['verbose']) elif params['fit_method'] == 'conditional_is': fit_model.fit_conditional(sub, T=params['is_T'], verbose=params['verbose']) elif params['fit_method'] == 'c_conditional': fit_model.fit_c_conditional(sub, verbose=params['verbose']) elif params['fit_method'] == 'composite': fit_model.fit_composite(sub, T=100, verbose=params['verbose']) elif params['fit_method'] == 'brazzale': fit_model.fit_brazzale(sub) elif params['fit_method'] == 'saddlepoint': fit_model.fit_saddlepoint(sub) elif params['fit_method'] == 'none': pass if params['post_fit']: sub.offset_extremes() fit_model.fit_convex_opt(sub, fix_beta=True) if params['dump_fits']: fit = {} fit['theta'] = pick(fit_model.beta) if 'alpha_out' in sub.row_covariates: fit['alpha'] = pick(sub.row_covariates['alpha_out']) if 'alpha_in' in sub.row_covariates: fit['beta'] = pick(sub.col_covariates['alpha_in']) if not fit_model.kappa is None: fit['kappa'] = fit_model.kappa if not sub.offset is None: sub.offset.dirty() fit['offset'] = pick(sub.offset) if not fit_model.fit_info is None: fit['fit_info'] = pick(fit_model.fit_info) fits.append(fit) if params['find_good'] > 0: abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0']) if abs_err < params['find_good']: print abs_err sub.offset = None fit_model.fit_conditional(sub, T=1000, verbose=True) print fit_model.beta['x_0'] print fit_model.fit_info f = file('goodmat.mat', 'wb') import scipy.io Y = np.array(sub.as_dense(), dtype=np.float) X = sub.edge_covariates['x_0'].matrix() scipy.io.savemat(f, {'Y': Y, 'X': X}) sys.exit() if params['find_bad'] > 0: abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0']) if abs_err > params['find_bad']: print abs_err sub.offset = None fit_model.fit_conditional(sub, T=1000, verbose=True) print fit_model.beta['x_0'] print fit_model.fit_info f = file('badmat.mat', 'wb') import scipy.io Y = np.array(sub.as_dense(), dtype=np.float) X = sub.edge_covariates['x_0'].matrix() scipy.io.savemat(f, {'Y': Y, 'X': X}) sys.exit() results.record(sub_size, rep, sub, data_model, fit_model) if params['verbose']: print if params['dump_fits']: with open(params['dump_fits'], 'w') as outfile: json.dump(([(p, pick(params[p])) for p in params], fits), outfile) # Compute beta MSEs covariate_naming = [] for c in covariates: mse_name = 'MSE(theta_{%s})' % c true_name = 'True theta_{%s}' % c est_name = 'Est. theta_{%s}' % c results.estimate_mse(mse_name, true_name, est_name) covariate_naming.append((c, mse_name, true_name, est_name)) # Report parameters for the run print 'Parameters:' for field in params: print '%s: %s' % (field, str(params[field])) # Should not vary between runs with the same seed and same number # of arrays tested seed.final() results.summary() return results, covariate_naming
def do_experiment(params): seed = Seed(params['random_seed']) alpha_level = params['alpha_level'] verbose = params['verbose'] L = params['L'] S = len(params['n_MC_levels']) T = params['is_T'] # Set up structure and methods for recording results results = { 'completed_trials': 0 } for method, disp in [#('umle_wald', 'UMLE Wald'), #('umle_boot', 'UMLE bootstrap (pivotal)'), #('cmle_wald', 'CMLE Wald'), #('cmle_boot', 'CMLE bootstrap (pivotal)'), #('brazzale', 'Conditional (Brazzale)'), #('umle', 'UMLE LR'), #('cmle_a', 'CMLE-A LR'), #('cmle_is', 'CMLE-IS (T = %d) LR' % T) ] + \ [('is_sc_c_%d' % n_MC, 'IS-score (n = %d)' % n_MC) for n_MC in params['n_MC_levels']] + \ [('is_lr_c_%d' % n_MC, 'IS-LR (n = %d)' % n_MC) for n_MC in params['n_MC_levels']]: #[('is_sc_u_%d' % n_MC, 'IS-score [un] (n = %d)' % n_MC) # for n_MC in params['n_MC_levels']] + \ #[('is_lr_u_%d' % n_MC, 'IS-LR [un] (n = %d)' % n_MC) # for n_MC in params['n_MC_levels']] + \ results[method] = { 'display': disp, 'in_interval': [], 'length': [], 'total_time': 0.0 } def do(out, name): ci, elapsed = out ci_l, ci_u = ci result = results[name] print '%s (%.2f sec): [%.2f, %.2f]' % \ (result['display'], elapsed, ci_l, ci_u) result['in_interval'].append(ci_l <= params['theta'] <= ci_u) result['length'].append(ci_u - ci_l) result['total_time'] += elapsed # Do experiment for X, v in generate_data(params['case'], params['theta'], seed): if (results['completed_trials'] == params['n_rep']) or terminated: break theta_grid = np.linspace(params['theta_l'], params['theta_u'], L) #do(ci_umle_wald(X, v, alpha_level), 'umle_wald') #do(ci_umle_boot(X, v, alpha_level), 'umle_boot') #do(ci_cmle_wald(X, v, alpha_level), 'cmle_wald') #do(ci_cmle_boot(X, v, alpha_level), 'cmle_boot') #do(ci_brazzale(X, v, alpha_level), 'brazzale') #do(ci_umle(X, v, theta_grid, alpha_level), 'umle') #do(ci_cmle_a(X, v, theta_grid, alpha_level), 'cmle_a') #do(ci_cmle_is(X, v, theta_grid, alpha_level, T, verbose), 'cmle_is') for n_MC in params['n_MC_levels']: for test in ['lr', 'score']: for corrected_str, corrected in [('c', True)]: #, ('u', False)]: do(ci_cons(X, v, alpha_level, params['L'], params['theta_l'], params['theta_u'], n_MC, test = test, corrected = corrected, verbose = verbose), 'is_%s_%s_%d' % (test[0:2], corrected_str, n_MC)) results['completed_trials'] += 1 # For verifying that same data was generated even if different # algorithms consumed a different amount of randomness seed.final() return results
def do_experiment(params): seed = Seed(params["random_seed"]) alpha_level = params["alpha_level"] verbose = params["verbose"] L = params["L"] S = len(params["n_MC_levels"]) T = params["is_T"] # Set up structure and methods for recording results results = {"completed_trials": 0} for method, disp in ( [ # ('umle_wald', 'UMLE Wald'), # ('umle_boot', 'UMLE bootstrap (pivotal)'), # ('cmle_wald', 'CMLE Wald'), # ('cmle_boot', 'CMLE bootstrap (pivotal)'), # ('brazzale', 'Conditional (Brazzale)'), ("umle", "UMLE LR"), ("cmle_a", "CMLE-A LR"), ("cmle_is", "CMLE-IS (T = %d) LR" % T), ] + [("is_sc_c_%d" % n_MC, "IS-score (n = %d)" % n_MC) for n_MC in params["n_MC_levels"]] + [("is_sc_u_%d" % n_MC, "IS-score [un] (n = %d)" % n_MC) for n_MC in params["n_MC_levels"]] + [("is_lr_c_%d" % n_MC, "IS-LR (n = %d)" % n_MC) for n_MC in params["n_MC_levels"]] + [("is_lr_u_%d" % n_MC, "IS-LR [un] (n = %d)" % n_MC) for n_MC in params["n_MC_levels"]] ): results[method] = {"display": disp, "in_interval": [], "length": [], "total_time": 0.0} def do(out, name): ci, elapsed = out ci_l, ci_u = ci result = results[name] print "%s (%.2f sec): [%.2f, %.2f]" % (result["display"], elapsed, ci_l, ci_u) result["in_interval"].append(ci_l <= params["theta"] <= ci_u) result["length"].append(ci_u - ci_l) result["total_time"] += elapsed # Do experiment for X, v in generate_data(params["case"], params["theta"], seed): if (results["completed_trials"] == params["n_rep"]) or terminated: break theta_grid = np.linspace(params["theta_l"], params["theta_u"], L) # do(ci_umle_wald(X, v, alpha_level), 'umle_wald') # do(ci_umle_boot(X, v, alpha_level), 'umle_boot') # do(ci_cmle_wald(X, v, alpha_level), 'cmle_wald') # do(ci_cmle_boot(X, v, alpha_level), 'cmle_boot') # do(ci_brazzale(X, v, alpha_level), 'brazzale') do(ci_umle(X, v, theta_grid, alpha_level), "umle") do(ci_cmle_a(X, v, theta_grid, alpha_level), "cmle_a") do(ci_cmle_is(X, v, theta_grid, alpha_level, T, verbose), "cmle_is") for n_MC in params["n_MC_levels"]: for test in ["lr", "score"]: for corrected_str, corrected in [("c", True), ("u", False)]: do( ci_cons( X, v, alpha_level, params["L"], params["theta_l"], params["theta_u"], n_MC, test=test, corrected=corrected, verbose=verbose, ), "is_%s_%s_%d" % (test[0:2], corrected_str, n_MC), ) results["completed_trials"] += 1 # For verifying that same data was generated even if different # algorithms consumed a different amount of randomness seed.final() return results
def do_experiment(params): if params['dump_fits'] and params['load_fits']: print 'Warning: simultaneously dumping and loading is a bad idea.' if params['dump_fits']: fits = [] if params['load_fits']: with open(params['load_fits'], 'r') as fits_file: loaded_params_pick, loaded_fits = json.load(fits_file) loaded_params = dict([(k,unpick(v)) for (k,v) in loaded_params_pick]) # Compare on parameters that control data generation and inference run_params = ['N', 'B', 'theta_sd', 'theta_fixed', 'alpha_unif_sd', 'alpha_norm_sd', 'alpha_gamma_sd', 'cov_unif_sd', 'cov_norm_sd', 'cov_disc_sd', 'kappa_target', 'pre_offset', 'post_fit', 'fit_nonstationary', 'fit_method', 'num_reps', 'is_T', 'sampling', 'sub_sizes_r', 'sub_sizes_c', 'random_seed'] for p in run_params: if not np.all(loaded_params[p] == params[p]): print 'Warning: load mismatch on', p # Set random seed for reproducible output seed = Seed(params['random_seed']) # Initialize full network arr = Network(params['N']) # Generate node-level propensities to extend and receive edges if params['alpha_norm_sd'] > 0.0: alpha_norm(arr, params['alpha_norm_sd']) elif params['alpha_unif_sd'] > 0.0: alpha_unif(arr, params['alpha_unif_sd']) elif params['alpha_gamma_sd'] > 0.0: # Choosing location somewhat arbitrarily to give unit skewness alpha_gamma(arr, 4.0, params['alpha_gamma_sd']) else: alpha_zero(arr) # Generate covariates and associated coefficients data_model = NonstationaryLogistic() covariates = [] for b in range(params['B']): name = 'x_%d' % b covariates.append(name) if name in params['theta_fixed']: data_model.beta[name] = params['theta_fixed'][name] else: data_model.beta[name] = np.random.normal(0, params['theta_sd']) if params['cov_unif_sd'] > 0.0: c = np.sqrt(12) / 2 def f_x(i_1, i_2): return np.random.uniform(-c * params['cov_unif_sd'], c * params['cov_unif_sd']) elif params['cov_norm_sd'] > 0.0: def f_x(i_1, i_2): return np.random.normal(0, params['cov_norm_sd']) elif params['cov_disc_sd'] > 0.0: def f_x(i_1, i_2): return (params['cov_disc_sd'] * (np.sign(np.random.random() - 0.5))) else: print 'Error: no covariate distribution specified.' sys.exit() arr.new_edge_covariate(name).from_binary_function_ind(f_x) # Generate large network, if necessary if not params['sampling'] == 'new': data_model.match_kappa(arr, params['kappa_target']) arr.generate(data_model) if params['fit_nonstationary']: fit_model = NonstationaryLogistic() else: fit_model = StationaryLogistic() for c in covariates: fit_model.beta[c] = None # Set up recording of results from experiment results = Results(params['sub_sizes_r'], params['sub_sizes_c'], params['num_reps'], interactive = params['interactive']) add_array_stats(results) if params['plot_sig']: from scipy.stats import chi2 crit = lambda dof: -0.5 * chi2.ppf(0.95, dof) umle_f = lambda n, f: f.nll(n, ignore_offset = True) umle_d = lambda n, d: d.nll(n, ignore_offset = True) umle_n = lambda n: NonstationaryLogistic().nll(n, ignore_offset = True) results.new('UMLE F-N', 'nm', lambda n, d, f: umle_f(n, f) - umle_n(n)) results.new('UMLE F-D', 'nm', lambda n, d, f: umle_f(n, f) - umle_d(n, d)) cmle_a_f = lambda n, f: acnll(n.as_dense(), np.exp(f.edge_probabilities(n))) cmle_a_d = lambda n, d: acnll(n.as_dense(), np.exp(d.edge_probabilities(n))) cmle_a_n = lambda n: acnll(n.as_dense(), np.ones_like(n.as_dense())) results.new('CMLE-A F-N', 'nm', lambda n, d, f: cmle_a_f(n, f) - cmle_a_n(n)) results.new('CMLE-A F-D', 'nm', lambda n, d, f: cmle_a_f(n, f) - cmle_a_d(n, d)) cmle_is_f = lambda n, f: f.fit_conditional(n, evaluate = True, T = 50) cmle_is_d = lambda n, d: d.fit_conditional(n, evaluate = True, T = 50) cmle_is_n = lambda n: NonstationaryLogistic().fit_conditional(n, evaluate = True, T = 50) results.new('CMLE-IS F-N', 'nm', lambda n, d, f: cmle_is_f(n, f) - cmle_is_n(n)) results.new('CMLE-IS F-D', 'nm', lambda n, d, f: cmle_is_f(n, f) - cmle_is_d(n, d)) c_cmle_f = lambda n, f: f.fit_c_conditional(n, evaluate = True) c_cmle_d = lambda n, d: d.fit_c_conditional(n, evaluate = True) c_cmle_n = lambda n: NonstationaryLogistic().fit_c_conditional(n, evaluate = True) results.new('C-CMLE F-N', 'nm', lambda n, d, f: c_cmle_f(n, f) - c_cmle_n(n)) results.new('C-CMLE F-D', 'nm', lambda n, d, f: c_cmle_f(n, f) - c_cmle_d(n, d)) results.new('UMLE sig.', 'dof', lambda M, N, B: crit((M - 1) + (N - 1) + 1 + B)) results.new('CMLE sig.', 'dof', lambda M, N, B: crit(B)) results.new('C-CMLE sig.', 'dof', lambda M, N, B: crit((M - 1) + B)) if params['sampling'] == 'new': results.new('Subnetwork kappa', 'm', lambda d, f: d.kappa) def true_est_theta_c(c): return (lambda d, f: d.beta[c]), (lambda d, f: f.beta[c]) for c in covariates: # Need to do this hackily to avoid for-loop/lambda-binding weirdness. f_true, f_est = true_est_theta_c(c) results.new('True theta_{%s}' % c, 'm', f_true) results.new('Est. theta_{%s}' % c, 'm', f_est) if params['pre_offset'] or params['post_fit']: results.new('# Active', 'n', lambda n: np.isfinite(n.offset.matrix()).sum()) else: results.new('# Active', 'n', lambda n: n.M * n.N) if params['fisher_information']: def info_theta_c(c): def f_info_theta_c(d, f): return d.I_inv['theta_{%s}' % c] return f_info_theta_c for c in covariates: results.new('Info theta_{%s}' % c, 'm', info_theta_c(c)) if params['baseline']: def rel_mse_p_ij(n, d, f): P = d.edge_probabilities(n) return rel_mse(f.edge_probabilities(n), f.baseline(n), P) results.new('Rel. MSE(P_ij)', 'nm', rel_mse_p_ij) if not (params['pre_offset'] or params['post_fit']): def rel_mse_logit_p_ij(n, d, f): logit_P = d.edge_probabilities(n, logit = True) logit_Q = f.baseline_logit(n) return rel_mse(f.edge_probabilities(n, logit = True), logit_Q, logit_P) results.new('Rel. MSE(logit P_ij)', 'nm', rel_mse_logit_p_ij) if params['fit_method'] in ['convex_opt', 'conditional', 'c_conditional', 'irls', 'conditional_is']: results.new('Wall time (sec.)', 'm', lambda d, f: f.fit_info['wall_time']) if params['fit_method'] in ['convex_opt', 'conditional', 'conditional_is']: def work(f): w = 0 for work_type in ['nll_evals', 'grad_nll_evals', 'cnll_evals']: if work_type in f.fit_info: w += f.fit_info[work_type] return w results.new('Work', 'm', lambda d, f: work(f)) results.new('||ET_final - T||_2', 'm', lambda d, f: l2(f.fit_info['grad_nll_final'])) for sub_size in zip(results.M_sizes, results.N_sizes): print 'subnetwork size =', sub_size if params['sampling'] == 'new': gen = RandomSubnetworks(arr, sub_size) else: gen = RandomSubnetworks(arr, sub_size, method = params['sampling']) for rep in range(params['num_reps']): seed.next() sub = gen.sample() if params['fisher_information']: data_model.fisher_information(sub) if params['sampling'] == 'new': data_model.match_kappa(sub, params['kappa_target']) sub.generate(data_model) if params['load_fits']: fit, loaded_fits = loaded_fits[0], loaded_fits[1:] fit_model.beta = unpick(fit['theta']) if params['fix_broken_cmle_is']: for b_n in fit_model.beta: fit_model.beta[b_n] += 0.1474 if 'alpha' in fit: sub.row_covariates['alpha_out'] = unpick(fit['alpha']) if 'beta' in fit: sub.col_covariates['alpha_in'] = unpick(fit['beta']) if 'kappa' in fit: fit_model.kappa = fit['kappa'] if 'offset' in fit: sub.offset = unpick(fit['offset']) if 'fit_info' in fit: fit_model.fit_info = unpick(fit['fit_info']) else: if params['pre_offset']: sub.offset_extremes() if params['fit_method'] == 'convex_opt': fit_model.fit_convex_opt(sub, verbose = params['verbose']) elif params['fit_method'] == 'irls': fit_model.fit_irls(sub, verbose = params['verbose']) elif params['fit_method'] == 'logistic': fit_model.fit_logistic(sub) elif params['fit_method'] == 'logistic_l2': fit_model.fit_logistic_l2(sub, prior_precision = 1.0) elif params['fit_method'] == 'conditional': fit_model.fit_conditional(sub, verbose = params['verbose']) elif params['fit_method'] == 'conditional_is': fit_model.fit_conditional(sub, T = params['is_T'], verbose = params['verbose']) elif params['fit_method'] == 'c_conditional': fit_model.fit_c_conditional(sub, verbose = params['verbose']) elif params['fit_method'] == 'composite': fit_model.fit_composite(sub, T = 100, verbose = params['verbose']) elif params['fit_method'] == 'brazzale': fit_model.fit_brazzale(sub) elif params['fit_method'] == 'saddlepoint': fit_model.fit_saddlepoint(sub) elif params['fit_method'] == 'none': pass if params['post_fit']: sub.offset_extremes() fit_model.fit_convex_opt(sub, fix_beta = True) if params['dump_fits']: fit = {} fit['theta'] = pick(fit_model.beta) if 'alpha_out' in sub.row_covariates: fit['alpha'] = pick(sub.row_covariates['alpha_out']) if 'alpha_in' in sub.row_covariates: fit['beta'] = pick(sub.col_covariates['alpha_in']) if not fit_model.kappa is None: fit['kappa'] = fit_model.kappa if not sub.offset is None: sub.offset.dirty() fit['offset'] = pick(sub.offset) if not fit_model.fit_info is None: fit['fit_info'] = pick(fit_model.fit_info) fits.append(fit) if params['find_good'] > 0: abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0']) if abs_err < params['find_good']: print abs_err sub.offset = None fit_model.fit_conditional(sub, T = 1000, verbose = True) print fit_model.beta['x_0'] print fit_model.fit_info f = file('goodmat.mat', 'wb') import scipy.io Y = np.array(sub.as_dense(), dtype=np.float) X = sub.edge_covariates['x_0'].matrix() scipy.io.savemat(f, { 'Y': Y, 'X': X }) sys.exit() if params['find_bad'] > 0: abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0']) if abs_err > params['find_bad']: print abs_err sub.offset = None fit_model.fit_conditional(sub, T = 1000, verbose = True) print fit_model.beta['x_0'] print fit_model.fit_info f = file('badmat.mat', 'wb') import scipy.io Y = np.array(sub.as_dense(), dtype=np.float) X = sub.edge_covariates['x_0'].matrix() scipy.io.savemat(f, { 'Y': Y, 'X': X }) sys.exit() results.record(sub_size, rep, sub, data_model, fit_model) if params['verbose']: print if params['dump_fits']: with open(params['dump_fits'], 'w') as outfile: json.dump(([(p, pick(params[p])) for p in params], fits), outfile) # Compute beta MSEs covariate_naming = [] for c in covariates: mse_name = 'MSE(theta_{%s})' % c true_name = 'True theta_{%s}' % c est_name = 'Est. theta_{%s}' % c results.estimate_mse(mse_name, true_name, est_name) covariate_naming.append((c, mse_name, true_name, est_name)) # Report parameters for the run print 'Parameters:' for field in params: print '%s: %s' % (field, str(params[field])) # Should not vary between runs with the same seed and same number # of arrays tested seed.final() results.summary() return results, covariate_naming