def ci_umle_wald(X, v, alpha_level): arr = array_from_data(X, [v]) arr.offset_extremes() alpha_zero(arr) fit_model = NonstationaryLogistic() fit_model.beta['x_0'] = None fit_model.confidence_wald(arr, strict = False, alpha_level = alpha_level) return safe_ci(fit_model, 'x_0', 'wald_inverse')
def ci_umle_wald(X, v, alpha_level): arr = array_from_data(X, [v]) arr.offset_extremes() alpha_zero(arr) fit_model = NonstationaryLogistic() fit_model.beta["x_0"] = None fit_model.confidence_wald(arr, alpha_level=alpha_level) return safe_ci(fit_model, "x_0", "wald")
def ci_umle_boot(X, v, alpha_level): arr = array_from_data(X, [v]) arr.offset_extremes() alpha_zero(arr) fit_model = NonstationaryLogistic() fit_model.beta['x_0'] = None fit_model.confidence_boot(arr, alpha_level = alpha_level) return fit_model.conf['x_0']['pivotal']
def ci_brazzale(X, v, alpha_level): arr = array_from_data(X, [v]) arr.offset_extremes() alpha_zero(arr) fit_model = NonstationaryLogistic() fit_model.beta['x_0'] = None fit_model.fit_brazzale(arr, 'x_0', alpha_level = alpha_level) return safe_ci(fit_model, 'x_0', 'brazzale')
def ci_umle(X, v, theta_grid, alpha_level): arr = array_from_data(X, [v]) arr.offset_extremes() alpha_zero(arr) fit_model = NonstationaryLogistic() umle = np.empty_like(theta_grid) for l, theta_l in enumerate(theta_grid): fit_model.beta['x_0'] = theta_l fit_model.fit(arr, fix_beta = True) umle[l] = -fit_model.nll(arr) crit = -0.5 * chi2.ppf(1 - alpha_level, 1) ci = invert_test(theta_grid, umle - umle.max(), crit) if params['plot']: plot_statistics(ax_umle, theta_grid, umle - umle.max(), crit) umle_coverage_data['cis'].append(ci) umle_coverage_data['theta_grid'] = theta_grid umle_coverage_data['crit'] = crit return ci
theta_grid_max = 3.0 theta_grid_G = 121 def cond_a_nll(X, w): return cond_a_nll_b(X, w, sort_by_wopt_var = True) def cond_a_sample(r, c, w, T = 0): return cond_a_sample_b(r, c, w, T, sort_by_wopt_var = True) while True: a = Array(M, N) alpha_norm(a, 1.0) a.new_edge_covariate('x')[:,:] = np.random.normal(0, 1, (M, N)) d = NonstationaryLogistic() d.beta['x'] = theta d.match_kappa(a, kappa_target) a.generate(d) f = NonstationaryLogistic() f.beta['x'] = None f.fit_conditional(a, T = T_fit, verbose = True) abs_err = abs(f.beta['x'] - d.beta['x']) if abs_err > min_error: print f.beta['x'] break theta_vec = np.linspace(theta_grid_min, theta_grid_max, theta_grid_G)
'cov_structure': 'multimodal_4_cycles', 'cov_mult': 2.0, 'num_reps': 10, 'coverage_increments': [0.01]*10 + [0.1]*10 + [0.2]*10 + [0.5]*10, 'arb_init': False } # Set random seed for reproducible output np.random.seed(137) # Initialize full network net = Network(params['N']) alpha_zero(net) # Generate covariates and associated coefficients data_model = NonstationaryLogistic() covariates = [] for b in range(params['B']): name = 'x_%d' % b covariates.append(name) if name in params['theta_fixed']: data_model.beta[name] = params['theta_fixed'][name] else: data_model.beta[name] = np.random.normal(0, params['theta_sd']) if params['cov_structure'] == 'none': def f_x(i_1, i_2): return np.random.uniform(-np.sqrt(3), np.sqrt(3)) elif params['cov_structure'] == 'unimodal': def f_x(i_1, i_2):
from Network import Network from Models import NonstationaryLogistic, alpha_unif from Experiment import RandomSubnetworks from numpy.random import normal, seed # Seed random number for reproducible results seed(137) # Initialize full network N = 300 net = Network(N) alpha_unif(net, 0.5) # Initialize the data model; generate covariates and associated coefficients data_model = NonstationaryLogistic() data_model.kappa = -7.0 covariates = ['x_%d' % i for i in range(5)] for covariate in covariates: data_model.beta[covariate] = normal(0, 1.0) x_node = normal(0, 1.0, N) def f_x(i_1, i_2): return abs(x_node[i_1] - x_node[i_2]) < 0.3 net.new_edge_covariate(covariate).from_binary_function_ind(f_x) net.generate(data_model) net.offset_extremes() net.show() print 'True theta_0: %.2f' % data_model.beta['x_0'] # Initialize the fit model; specify which covariates it should have terms for
from Network import Network from Models import NonstationaryLogistic, alpha_unif from Experiment import RandomSubnetworks from numpy.random import normal, seed # Seed random number for reproducible results seed(137) # Initialize full network N = 100 net = Network(N) alpha_unif(net, 0.5) # Initialize the data model; generate covariates and associated coefficients data_model = NonstationaryLogistic() data_model.kappa = -1.0 covariates = ['x_%d' % i for i in range(1)] for covariate in covariates: data_model.beta[covariate] = normal(0, 1.0) x_node = normal(0, 1.0, N) def f_x(i_1, i_2): return abs(x_node[i_1] - x_node[i_2]) < 0.6 net.new_edge_covariate(covariate).from_binary_function_ind(f_x) net.generate(data_model) net.offset_extremes() net.show() print 'True theta_0: %.2f' % data_model.beta['x_0'] # Initialize the fit model; specify which covariates it should have terms for
def cond_a_nll(X, w): return cond_a_nll_b(X, w, sort_by_wopt_var=True) def cond_a_sample(r, c, w, T=0): return cond_a_sample_b(r, c, w, T, sort_by_wopt_var=True) while True: a = Array(M, N) alpha_norm(a, 1.0) a.new_edge_covariate('x')[:, :] = np.random.normal(0, 1, (M, N)) d = NonstationaryLogistic() d.beta['x'] = theta d.match_kappa(a, kappa_target) a.generate(d) f = NonstationaryLogistic() f.beta['x'] = None f.fit_conditional(a, T=T_fit, verbose=True) abs_err = abs(f.beta['x'] - d.beta['x']) if abs_err > min_error: print f.beta['x'] break theta_vec = np.linspace(theta_grid_min, theta_grid_max, theta_grid_G)
init_latex_rendering() # Parameters N = 25 G = 20 alpha_sd = 2.0 theta_true = { 'x_1': 2.0, 'x_2': -1.0 } target_degree = 2 # Setup network net = Network(N) alpha_norm(net, alpha_sd) # Setup data model and network covariates data_model = NonstationaryLogistic() covariates = [] for name in theta_true: covariates.append(name) data_model.beta[name] = theta_true[name] def f_x(i_1, i_2): return np.random.normal(0, 1.0) net.new_edge_covariate(name).from_binary_function_ind(f_x) # Instantiate network according to data model data_model.match_kappa(net, ('row_sum', target_degree)) net.generate(data_model) #net.show_heatmap(order_by_row = 'alpha_out') #net.show_heatmap(order_by_col = 'alpha_in')
print '%s: %.2f' % (cov_name, c_model.base_model.beta[cov_name]) print for rep in range(params['n_samples']): c_samples[rep,:,:] = c_model.generate(net, coverage = 0.1) c_model.confidence_boot(net, n_bootstrap = params['n_bootstrap']) c_model.confidence_wald(net) for cov_name in cov_names: c_model.confidence_cons(net, cov_name, L = 121, test = 'score') c_model.confidence_cons(net, cov_name, L = 121, test = 'lr') display_cis(c_model) # Offset extreme substructure only for Nonstationary model net.offset_extremes() print 'Fitting nonstationary model' ns_model = NonstationaryLogistic() for cov_name in cov_names: ns_model.beta[cov_name] = None ns_model.fit(net) print 'NLL: %.2f' % ns_model.nll(net) print 'kappa: %.2f' % ns_model.kappa for cov_name in cov_names: print '%s: %.2f' % (cov_name, ns_model.beta[cov_name]) print for rep in range(params['n_samples']): ns_samples[rep,:,:] = ns_model.generate(net) ns_model.confidence_boot(net, n_bootstrap = params['n_bootstrap']) ns_model.confidence_wald(net) display_cis(ns_model) # Calculate sample means and variances
from Network import Network from Models import NonstationaryLogistic, alpha_unif from Experiment import RandomSubnetworks from numpy.random import normal, seed # Seed random number for reproducible results seed(137) # Initialize full network N = 300 net = Network(N) alpha_unif(net, 0.5) # Initialize the data model; generate covariates and associated coefficients data_model = NonstationaryLogistic() data_model.kappa = -7.0 covariates = ['x_%d' % i for i in range(1)] for covariate in covariates: data_model.beta[covariate] = normal(0, 1.0) x_node = normal(0, 1.0, N) def f_x(i_1, i_2): return abs(x_node[i_1] - x_node[i_2]) < 0.3 net.new_edge_covariate(covariate).from_binary_function_ind(f_x) net.generate(data_model) net.offset_extremes() net.show() print 'True theta_0: %.2f' % data_model.beta['x_0'] # Initialize the fit model; specify which covariates it should have terms for
N = 50 D = 1 theta = 2.0 kappa_target = ('row_sum', 2) alpha_sd = 2.0 n_rep = 100 n_boot = 10 alpha_level = 0.05 net = Network(N) alpha_norm(net, alpha_sd) for d in range(D): net.new_edge_covariate('x_%d' % d)[:,:] = np.random.normal(0, 1, (N, N)) data_model = NonstationaryLogistic() for d in range(D): data_model.beta['x_%d' % d] = np.random.normal(0, 1) data_model.beta['x_0'] = theta data_model.match_kappa(net, kappa_target) s_fit = StationaryLogistic() ns_fit = NonstationaryLogistic() for d in range(D): s_fit.beta['x_%d' % d] = None ns_fit.beta['x_%d' % d] = None def safe_ci(model, name, method): if name in model.conf: if method in model.conf[name]: return model.conf[name][method]
import numpy as np import matplotlib.pyplot as plt from Network import Network from Models import StationaryLogistic, NonstationaryLogistic, alpha_unif from Experiment import RandomSubnetworks from Utility import draw_confidence # Initialize full network N = 300 sub_N = 100 net = Network(N) alpha_unif(net, 0.5) # Initialize the data model; generate covariates and associated coefficients data_model = NonstationaryLogistic() data_model.kappa = -7.0 covariates = ['x_1', 'x_2', 'x_3', 'x_4', 'x_5'] for covariate in covariates: data_model.beta[covariate] = np.random.normal(0, 1.0) x_node = np.random.normal(0, 1.0, N) def f_x(i_1, i_2): return abs(x_node[i_1] - x_node[i_2]) < 0.3 net.new_edge_covariate(covariate).from_binary_function_ind(f_x) net.generate(data_model) print 'True beta_1: %.2f' % data_model.beta['x_1'] # Initialize the fit model; specify which covariates it should have terms for fit_model = StationaryLogistic() for covariate in covariates:
for cov_name in cov_names: fit_model.beta[cov_name] = None fit_model.fit(net, verbose=params['verbose']) print 'NLL: %.2f' % fit_model.nll(net) print 'kappa: %.2f' % fit_model.kappa if use_covs: for cov_name in cov_names: print '%s: %.2f' % (cov_name, fit_model.beta[cov_name]) print '\n' fit_and_summarize('Stationary', Stationary(), False) fit_and_summarize('Stationary', StationaryLogistic(), True) if params['offset_extremes']: print 'Detecting subnetworks associated with infinite parameter estimates.\n' net.offset_extremes() if params['plot']: net.show_offset('pub_date') fit_and_summarize('Nonstationary', NonstationaryLogistic(), False) fit_and_summarize('Nonstationary', NonstationaryLogistic(), True) # Redisplay heatmap, ordered by estimated alphas from last fit, i.e., # NonstationaryLogistic with publication date difference covariates # XX: Following plots are broken #if params['plot']: # net.show_heatmap('alpha_out') # net.show_heatmap('alpha_in') outfile = open('scratch.json', 'w') outfile.write(dump_to_json(net)) outfile.close()
net.new_node_covariate('low_degree').from_pairs(net.names, degree < med_degree) for v_1, v_2, name in [(0, 0, 'high_to_high'), (1, 1, 'low_to_low'), (0, 1, 'high_to_low')]: covariates.append(name) def f_x(i_1, i_2): return ((net.node_covariates['low_degree'][i_1] == v_1) and (net.node_covariates['low_degree'][i_2] == v_2)) net.new_edge_covariate(name).from_binary_function_ind(f_x) # Initialize fitting model fit_model = StationaryLogistic() n_fit_model = NonstationaryLogistic() for c in covariates: fit_model.beta[c] = None n_fit_model.beta[c] = None # Set up recording of results from experiment results = Results(params['sub_sizes'], params['num_reps'], 'Stationary fit') add_network_stats(results) def est_theta_c(c): return lambda d, f: f.beta[c] for c in covariates: f_est = est_theta_c(c) results.new('%s' % c, 'm', f_est) all_results = {} if params['fit_stationary']:
data_model = StationaryLogistic() covariates = [] data_model.beta = {} for b in range(params['B']): name = 'x_%d' % b covariates.append(name) data_model.beta[name] = np.random.normal(0, params['beta_sd']) x_node = np.random.normal(0, 1, params['N']) def f_x(i_1, i_2): return abs(x_node[i_1] - x_node[i_2]) < params['x_diff_cutoff'] net.new_edge_covariate(name).from_binary_function_ind(f_x) if params['fit_nonstationary']: fit_model = NonstationaryLogistic() else: fit_model = StationaryLogistic() for c in covariates: fit_model.beta[c] = None # Set up recording of results from experiment results = Results(params['sub_sizes'], params['sub_sizes'], params['num_reps']) add_array_stats(results) def f_c(c): return (lambda d, f: d.beta[c]), (lambda d, f: f.beta[c]) for c in covariates: # Need to do this hackily to avoid for-loop/lambda-binding weirdness. f_true, f_estimated = f_c(c) results.new('True beta_{%s}' % c, 'm', f_true)
# Initialize full network net = Network(params['N']) # Generate node-level propensities to extend and receive edges if params['alpha_norm_sd'] > 0.0: alpha_norm(net, params['alpha_norm_sd']) elif params['alpha_unif'] > 0.0: alpha_unif(net, params['alpha_unif']) elif params['alpha_gamma_sd'] > 0.0: # Choosing location somewhat arbitrarily to give unit skewness alpha_gamma(net, 4.0, params['alpha_gamma_sd']) else: alpha_zero(net) # Generate covariates and associated coefficients data_base_model = NonstationaryLogistic() covariates = [] for b in range(params['B']): name = 'x_%d' % b covariates.append(name) data_base_model.beta[name] = np.random.normal(0, params['beta_sd']) def f_x(i_1, i_2): return np.random.uniform(-np.sqrt(3), np.sqrt(3)) net.new_edge_covariate(name).from_binary_function_ind(f_x) # Initialize data (block)model from base model class_probs = np.random.dirichlet(np.repeat(params['class_conc'], params['K'])) z = np.where(np.random.multinomial(1, class_probs, params['N']) == 1)[1]
print 'kappa: %.2f' % s_model.kappa for cov_name in cov_names: print '%s: %.2f' % (cov_name, s_model.beta[cov_name]) print for rep in range(params['n_samples']): s_samples[rep, :, :] = s_model.generate(net) s_model.confidence(net, n_bootstrap=params['n_bootstrap']) print 'Pivotal:' for cov_name in cov_names: ci = s_model.conf[cov_name]['pivotal'] print ' %s: (%.2f, %.2f)' % (cov_name, ci[0], ci[1]) print print 'Fitting nonstationary model' alpha_zero(net) ns_model = NonstationaryLogistic() for cov_name in cov_names: ns_model.beta[cov_name] = None ns_model.fit(net) print 'NLL: %.2f' % ns_model.nll(net) print 'kappa: %.2f' % ns_model.kappa for cov_name in cov_names: print '%s: %.2f' % (cov_name, ns_model.beta[cov_name]) print for rep in range(params['n_samples']): ns_samples[rep, :, :] = ns_model.generate(net) ns_model.confidence(net, n_bootstrap=params['n_bootstrap']) print 'Pivotal:' for cov_name in cov_names: ci = ns_model.conf[cov_name]['pivotal'] print ' %s: (%.2f, %.2f)' % (cov_name, ci[0], ci[1])
data_model.beta = {} for b in range(params['B']): name = 'x_%d' % b covariates.append(name) data_model.beta[name] = np.random.normal(0, params['beta_sd']) x_node = np.random.normal(0, 1, params['N']) def f_x(i_1, i_2): return abs(x_node[i_1] - x_node[i_2]) < params['x_diff_cutoff'] net.new_edge_covariate(name).from_binary_function_ind(f_x) if params['fit_nonstationary']: fit_model = NonstationaryLogistic() else: fit_model = StationaryLogistic() for c in covariates: fit_model.beta[c] = None # Set up recording of results from experiment results = Results(params['sub_sizes'], params['sub_sizes'], params['num_reps']) add_array_stats(results) def f_c(c): return (lambda d, f: d.beta[c]), (lambda d, f: f.beta[c]) for c in covariates:
# Initialize array arr = Array(params['M'], params['N']) # Generate node-level propensities to extend and receive edges if params['alpha_norm_sd'] > 0.0: alpha_norm(arr, params['alpha_norm_sd']) elif params['alpha_unif_sd'] > 0.0: alpha_unif(arr, params['alpha_unif_sd']) elif params['alpha_gamma_sd'] > 0.0: # Choosing location somewhat arbitrarily to give unit skewness alpha_gamma(arr, 4.0, params['alpha_gamma_sd']) else: alpha_zero(arr) # Generate covariates and associated coefficients data_model = NonstationaryLogistic() covariates = [] for b in range(params['B']): name = 'x_%d' % b covariates.append(name) if name in params['beta_fixed']: data_model.beta[name] = params['beta_fixed'][name] else: data_model.beta[name] = np.random.normal(0, params['beta_sd']) def f_x(i_1, i_2): return np.random.uniform(-np.sqrt(3), np.sqrt(3)) arr.new_edge_covariate(name).from_binary_function_ind(f_x) data_model.match_kappa(arr, params['kappa_target'])
import numpy as np import matplotlib.pyplot as plt from Network import Network from Models import NonstationaryLogistic from Models import alpha_zero, alpha_norm, alpha_gamma, alpha_unif from Experiment import RandomSubnetworks # Parameters N = 300 reps = 10 sub_sizes = range(10, 110, 10) kappa_target = ('row_sum', 2) net = Network(N) model = NonstationaryLogistic() num_sizes = len(sub_sizes) data_none = np.empty((num_sizes, reps)) data_het = np.empty((3, 3, num_sizes, reps)) for i, degree_het in enumerate(['Normal', 'Gamma', 'Uniform', 'None']): if degree_het == 'None': alpha_zero(net) for j, het_sd in enumerate([1.0, 2.0, 3.0, 0.0]): if degree_het == 'None' and het_sd != 0.0: continue if degree_het != 'None' and het_sd == 0.0: continue if degree_het == 'Normal': alpha_norm(net, het_sd) if degree_het == 'Gamma': alpha_gamma(net, 4.0, het_sd)
nx.draw(graph, pos, node_size = 10, with_labels = False) print 'Fitting stationary model' s_model = StationaryLogistic() for cov_name in cov_names: s_model.beta[cov_name] = None s_model.fit(net, verbose = True) print 'NLL: %.2f' % s_model.nll(net) print 'kappa: %.2f' % s_model.kappa for cov_name in cov_names: print '%s: %.2f' % (cov_name, s_model.beta[cov_name]) print print 'Fitting nonstationary model' alpha_zero(net) ns_model = NonstationaryLogistic() for cov_name in cov_names: ns_model.beta[cov_name] = None ns_model.fit(net, verbose = True) print 'NLL: %.2f' % ns_model.nll(net) print 'kappa: %.2f' % ns_model.kappa for cov_name in cov_names: print '%s: %.2f' % (cov_name, ns_model.beta[cov_name]) print print 'Fitting conditional model' c_model = FixedMargins(StationaryLogistic()) for cov_name in cov_names: c_model.base_model.beta[cov_name] = None c_model.base_model.fit_conditional(net, verbose = True) print 'NLL: %.2f' % c_model.nll(net)
from Network import Network from Models import NonstationaryLogistic, alpha_unif from Experiment import RandomSubnetworks from numpy.random import normal, seed # Seed random number for reproducible results seed(137) # Initialize full network N = 300 net = Network(N) alpha_unif(net, 0.5) # Initialize the data model; generate covariates and associated coefficients data_model = NonstationaryLogistic() data_model.kappa = -7.0 covariates = ['x_%d' % i for i in range(1)] for covariate in covariates: data_model.beta[covariate] = normal(0, 1.0) x_node = normal(0, 1.0, N) def f_x(i_1, i_2): return abs(x_node[i_1] - x_node[i_2]) < 0.3 net.new_edge_covariate(covariate).from_binary_function_ind(f_x) net.generate(data_model) net.offset_extremes() net.show() print 'True theta_0: %.2f' % data_model.beta['x_0']
'sub_sizes': np.floor(np.logspace(1.0, 2.1, 20)), 'verbose': True, 'plot_mse': True, 'plot_network': False, 'plot_fit_info': True } # Set random seed for reproducible output np.random.seed(137) # Initialize full network net = Network(params['N']) alpha_zero(net) # Generate covariates and associated coefficients data_model = NonstationaryLogistic() for b in range(params['B']): name = 'x_%d' % b if name in params['theta_fixed']: data_model.beta[name] = params['theta_fixed'][name] else: data_model.beta[name] = np.random.normal(0, params['theta_sd']) if params['cov_unif_sd'] > 0.0: c = np.sqrt(12) / 2 def f_x(i_1, i_2): return np.random.uniform(-c * params['cov_unif_sd'], c * params['cov_unif_sd']) elif params['cov_norm_sd'] > 0.0: def f_x(i_1, i_2):
init_latex_rendering() # Parameters N = 20 G = 30 alpha_sd = 2.0 theta_true = {'x_1': 2.0, 'x_2': -1.0} target_degree = 2 # Setup network net = Network(N) alpha_norm(net, alpha_sd) # Setup data model and network covariates data_model = NonstationaryLogistic() covariates = [] for name in theta_true: covariates.append(name) data_model.beta[name] = theta_true[name] def f_x(i_1, i_2): return np.random.normal(0, 1.0) net.new_edge_covariate(name).from_binary_function_ind(f_x) # Instantiate network according to data model data_model.match_kappa(net, ('row_sum', target_degree)) net.generate(data_model) net.show_heatmap(order_by_row='alpha_out')
def do_experiment(params): if params['dump_fits'] and params['load_fits']: print 'Warning: simultaneously dumping and loading is a bad idea.' if params['dump_fits']: fits = [] if params['load_fits']: with open(params['load_fits'], 'r') as fits_file: loaded_params_pick, loaded_fits = json.load(fits_file) loaded_params = dict([(k,unpick(v)) for (k,v) in loaded_params_pick]) # Compare on parameters that control data generation and inference run_params = ['N', 'B', 'theta_sd', 'theta_fixed', 'alpha_unif_sd', 'alpha_norm_sd', 'alpha_gamma_sd', 'cov_unif_sd', 'cov_norm_sd', 'cov_disc_sd', 'kappa_target', 'pre_offset', 'post_fit', 'fit_nonstationary', 'fit_method', 'num_reps', 'is_T', 'sampling', 'sub_sizes_r', 'sub_sizes_c', 'random_seed'] for p in run_params: if not np.all(loaded_params[p] == params[p]): print 'Warning: load mismatch on', p # Set random seed for reproducible output seed = Seed(params['random_seed']) # Initialize full network arr = Network(params['N']) # Generate node-level propensities to extend and receive edges if params['alpha_norm_sd'] > 0.0: alpha_norm(arr, params['alpha_norm_sd']) elif params['alpha_unif_sd'] > 0.0: alpha_unif(arr, params['alpha_unif_sd']) elif params['alpha_gamma_sd'] > 0.0: # Choosing location somewhat arbitrarily to give unit skewness alpha_gamma(arr, 4.0, params['alpha_gamma_sd']) else: alpha_zero(arr) # Generate covariates and associated coefficients data_model = NonstationaryLogistic() covariates = [] for b in range(params['B']): name = 'x_%d' % b covariates.append(name) if name in params['theta_fixed']: data_model.beta[name] = params['theta_fixed'][name] else: data_model.beta[name] = np.random.normal(0, params['theta_sd']) if params['cov_unif_sd'] > 0.0: c = np.sqrt(12) / 2 def f_x(i_1, i_2): return np.random.uniform(-c * params['cov_unif_sd'], c * params['cov_unif_sd']) elif params['cov_norm_sd'] > 0.0: def f_x(i_1, i_2): return np.random.normal(0, params['cov_norm_sd']) elif params['cov_disc_sd'] > 0.0: def f_x(i_1, i_2): return (params['cov_disc_sd'] * (np.sign(np.random.random() - 0.5))) else: print 'Error: no covariate distribution specified.' sys.exit() arr.new_edge_covariate(name).from_binary_function_ind(f_x) # Generate large network, if necessary if not params['sampling'] == 'new': data_model.match_kappa(arr, params['kappa_target']) arr.generate(data_model) if params['fit_nonstationary']: fit_model = NonstationaryLogistic() else: fit_model = StationaryLogistic() for c in covariates: fit_model.beta[c] = None # Set up recording of results from experiment results = Results(params['sub_sizes_r'], params['sub_sizes_c'], params['num_reps'], interactive = params['interactive']) add_array_stats(results) if params['plot_sig']: from scipy.stats import chi2 crit = lambda dof: -0.5 * chi2.ppf(0.95, dof) umle_f = lambda n, f: f.nll(n, ignore_offset = True) umle_d = lambda n, d: d.nll(n, ignore_offset = True) umle_n = lambda n: NonstationaryLogistic().nll(n, ignore_offset = True) results.new('UMLE F-N', 'nm', lambda n, d, f: umle_f(n, f) - umle_n(n)) results.new('UMLE F-D', 'nm', lambda n, d, f: umle_f(n, f) - umle_d(n, d)) cmle_a_f = lambda n, f: acnll(n.as_dense(), np.exp(f.edge_probabilities(n))) cmle_a_d = lambda n, d: acnll(n.as_dense(), np.exp(d.edge_probabilities(n))) cmle_a_n = lambda n: acnll(n.as_dense(), np.ones_like(n.as_dense())) results.new('CMLE-A F-N', 'nm', lambda n, d, f: cmle_a_f(n, f) - cmle_a_n(n)) results.new('CMLE-A F-D', 'nm', lambda n, d, f: cmle_a_f(n, f) - cmle_a_d(n, d)) cmle_is_f = lambda n, f: f.fit_conditional(n, evaluate = True, T = 50) cmle_is_d = lambda n, d: d.fit_conditional(n, evaluate = True, T = 50) cmle_is_n = lambda n: NonstationaryLogistic().fit_conditional(n, evaluate = True, T = 50) results.new('CMLE-IS F-N', 'nm', lambda n, d, f: cmle_is_f(n, f) - cmle_is_n(n)) results.new('CMLE-IS F-D', 'nm', lambda n, d, f: cmle_is_f(n, f) - cmle_is_d(n, d)) c_cmle_f = lambda n, f: f.fit_c_conditional(n, evaluate = True) c_cmle_d = lambda n, d: d.fit_c_conditional(n, evaluate = True) c_cmle_n = lambda n: NonstationaryLogistic().fit_c_conditional(n, evaluate = True) results.new('C-CMLE F-N', 'nm', lambda n, d, f: c_cmle_f(n, f) - c_cmle_n(n)) results.new('C-CMLE F-D', 'nm', lambda n, d, f: c_cmle_f(n, f) - c_cmle_d(n, d)) results.new('UMLE sig.', 'dof', lambda M, N, B: crit((M - 1) + (N - 1) + 1 + B)) results.new('CMLE sig.', 'dof', lambda M, N, B: crit(B)) results.new('C-CMLE sig.', 'dof', lambda M, N, B: crit((M - 1) + B)) if params['sampling'] == 'new': results.new('Subnetwork kappa', 'm', lambda d, f: d.kappa) def true_est_theta_c(c): return (lambda d, f: d.beta[c]), (lambda d, f: f.beta[c]) for c in covariates: # Need to do this hackily to avoid for-loop/lambda-binding weirdness. f_true, f_est = true_est_theta_c(c) results.new('True theta_{%s}' % c, 'm', f_true) results.new('Est. theta_{%s}' % c, 'm', f_est) if params['pre_offset'] or params['post_fit']: results.new('# Active', 'n', lambda n: np.isfinite(n.offset.matrix()).sum()) else: results.new('# Active', 'n', lambda n: n.M * n.N) if params['fisher_information']: def info_theta_c(c): def f_info_theta_c(d, f): return d.I_inv['theta_{%s}' % c] return f_info_theta_c for c in covariates: results.new('Info theta_{%s}' % c, 'm', info_theta_c(c)) if params['baseline']: def rel_mse_p_ij(n, d, f): P = d.edge_probabilities(n) return rel_mse(f.edge_probabilities(n), f.baseline(n), P) results.new('Rel. MSE(P_ij)', 'nm', rel_mse_p_ij) if not (params['pre_offset'] or params['post_fit']): def rel_mse_logit_p_ij(n, d, f): logit_P = d.edge_probabilities(n, logit = True) logit_Q = f.baseline_logit(n) return rel_mse(f.edge_probabilities(n, logit = True), logit_Q, logit_P) results.new('Rel. MSE(logit P_ij)', 'nm', rel_mse_logit_p_ij) if params['fit_method'] in ['convex_opt', 'conditional', 'c_conditional', 'irls', 'conditional_is']: results.new('Wall time (sec.)', 'm', lambda d, f: f.fit_info['wall_time']) if params['fit_method'] in ['convex_opt', 'conditional', 'conditional_is']: def work(f): w = 0 for work_type in ['nll_evals', 'grad_nll_evals', 'cnll_evals']: if work_type in f.fit_info: w += f.fit_info[work_type] return w results.new('Work', 'm', lambda d, f: work(f)) results.new('||ET_final - T||_2', 'm', lambda d, f: l2(f.fit_info['grad_nll_final'])) for sub_size in zip(results.M_sizes, results.N_sizes): print 'subnetwork size =', sub_size if params['sampling'] == 'new': gen = RandomSubnetworks(arr, sub_size) else: gen = RandomSubnetworks(arr, sub_size, method = params['sampling']) for rep in range(params['num_reps']): seed.next() sub = gen.sample() if params['fisher_information']: data_model.fisher_information(sub) if params['sampling'] == 'new': data_model.match_kappa(sub, params['kappa_target']) sub.generate(data_model) if params['load_fits']: fit, loaded_fits = loaded_fits[0], loaded_fits[1:] fit_model.beta = unpick(fit['theta']) if params['fix_broken_cmle_is']: for b_n in fit_model.beta: fit_model.beta[b_n] += 0.1474 if 'alpha' in fit: sub.row_covariates['alpha_out'] = unpick(fit['alpha']) if 'beta' in fit: sub.col_covariates['alpha_in'] = unpick(fit['beta']) if 'kappa' in fit: fit_model.kappa = fit['kappa'] if 'offset' in fit: sub.offset = unpick(fit['offset']) if 'fit_info' in fit: fit_model.fit_info = unpick(fit['fit_info']) else: if params['pre_offset']: sub.offset_extremes() if params['fit_method'] == 'convex_opt': fit_model.fit_convex_opt(sub, verbose = params['verbose']) elif params['fit_method'] == 'irls': fit_model.fit_irls(sub, verbose = params['verbose']) elif params['fit_method'] == 'logistic': fit_model.fit_logistic(sub) elif params['fit_method'] == 'logistic_l2': fit_model.fit_logistic_l2(sub, prior_precision = 1.0) elif params['fit_method'] == 'conditional': fit_model.fit_conditional(sub, verbose = params['verbose']) elif params['fit_method'] == 'conditional_is': fit_model.fit_conditional(sub, T = params['is_T'], verbose = params['verbose']) elif params['fit_method'] == 'c_conditional': fit_model.fit_c_conditional(sub, verbose = params['verbose']) elif params['fit_method'] == 'composite': fit_model.fit_composite(sub, T = 100, verbose = params['verbose']) elif params['fit_method'] == 'brazzale': fit_model.fit_brazzale(sub) elif params['fit_method'] == 'saddlepoint': fit_model.fit_saddlepoint(sub) elif params['fit_method'] == 'none': pass if params['post_fit']: sub.offset_extremes() fit_model.fit_convex_opt(sub, fix_beta = True) if params['dump_fits']: fit = {} fit['theta'] = pick(fit_model.beta) if 'alpha_out' in sub.row_covariates: fit['alpha'] = pick(sub.row_covariates['alpha_out']) if 'alpha_in' in sub.row_covariates: fit['beta'] = pick(sub.col_covariates['alpha_in']) if not fit_model.kappa is None: fit['kappa'] = fit_model.kappa if not sub.offset is None: sub.offset.dirty() fit['offset'] = pick(sub.offset) if not fit_model.fit_info is None: fit['fit_info'] = pick(fit_model.fit_info) fits.append(fit) if params['find_good'] > 0: abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0']) if abs_err < params['find_good']: print abs_err sub.offset = None fit_model.fit_conditional(sub, T = 1000, verbose = True) print fit_model.beta['x_0'] print fit_model.fit_info f = file('goodmat.mat', 'wb') import scipy.io Y = np.array(sub.as_dense(), dtype=np.float) X = sub.edge_covariates['x_0'].matrix() scipy.io.savemat(f, { 'Y': Y, 'X': X }) sys.exit() if params['find_bad'] > 0: abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0']) if abs_err > params['find_bad']: print abs_err sub.offset = None fit_model.fit_conditional(sub, T = 1000, verbose = True) print fit_model.beta['x_0'] print fit_model.fit_info f = file('badmat.mat', 'wb') import scipy.io Y = np.array(sub.as_dense(), dtype=np.float) X = sub.edge_covariates['x_0'].matrix() scipy.io.savemat(f, { 'Y': Y, 'X': X }) sys.exit() results.record(sub_size, rep, sub, data_model, fit_model) if params['verbose']: print if params['dump_fits']: with open(params['dump_fits'], 'w') as outfile: json.dump(([(p, pick(params[p])) for p in params], fits), outfile) # Compute beta MSEs covariate_naming = [] for c in covariates: mse_name = 'MSE(theta_{%s})' % c true_name = 'True theta_{%s}' % c est_name = 'Est. theta_{%s}' % c results.estimate_mse(mse_name, true_name, est_name) covariate_naming.append((c, mse_name, true_name, est_name)) # Report parameters for the run print 'Parameters:' for field in params: print '%s: %s' % (field, str(params[field])) # Should not vary between runs with the same seed and same number # of arrays tested seed.final() results.summary() return results, covariate_naming
nx.draw(graph, pos, node_size=10, with_labels=False) print 'Fitting stationary model' s_model = StationaryLogistic() for cov_name in cov_names: s_model.beta[cov_name] = None s_model.fit(net, verbose=True) print 'NLL: %.2f' % s_model.nll(net) print 'kappa: %.2f' % s_model.kappa for cov_name in cov_names: print '%s: %.2f' % (cov_name, s_model.beta[cov_name]) print print 'Fitting nonstationary model' alpha_zero(net) ns_model = NonstationaryLogistic() for cov_name in cov_names: ns_model.beta[cov_name] = None ns_model.fit(net, verbose=True) print 'NLL: %.2f' % ns_model.nll(net) print 'kappa: %.2f' % ns_model.kappa for cov_name in cov_names: print '%s: %.2f' % (cov_name, ns_model.beta[cov_name]) print print 'Fitting conditional model' c_model = StationaryLogistic() for cov_name in cov_names: c_model.beta[cov_name] = None c_model.fit_conditional(net, T=0, verbose=True) print 'NLL: %.2f' % c_model.nll(net)
def do_experiment(params): if params['dump_fits'] and params['load_fits']: print 'Warning: simultaneously dumping and loading is a bad idea.' if params['dump_fits']: fits = [] if params['load_fits']: with open(params['load_fits'], 'r') as fits_file: loaded_params_pick, loaded_fits = json.load(fits_file) loaded_params = dict([(k, unpick(v)) for (k, v) in loaded_params_pick]) # Compare on parameters that control data generation and inference run_params = [ 'N', 'B', 'theta_sd', 'theta_fixed', 'alpha_unif_sd', 'alpha_norm_sd', 'alpha_gamma_sd', 'cov_unif_sd', 'cov_norm_sd', 'cov_disc_sd', 'kappa_target', 'pre_offset', 'post_fit', 'fit_nonstationary', 'fit_method', 'num_reps', 'is_T', 'sampling', 'sub_sizes_r', 'sub_sizes_c', 'random_seed' ] for p in run_params: if not np.all(loaded_params[p] == params[p]): print 'Warning: load mismatch on', p # Set random seed for reproducible output seed = Seed(params['random_seed']) # Initialize full network arr = Network(params['N']) # Generate node-level propensities to extend and receive edges if params['alpha_norm_sd'] > 0.0: alpha_norm(arr, params['alpha_norm_sd']) elif params['alpha_unif_sd'] > 0.0: alpha_unif(arr, params['alpha_unif_sd']) elif params['alpha_gamma_sd'] > 0.0: # Choosing location somewhat arbitrarily to give unit skewness alpha_gamma(arr, 4.0, params['alpha_gamma_sd']) else: alpha_zero(arr) # Generate covariates and associated coefficients data_model = NonstationaryLogistic() covariates = [] for b in range(params['B']): name = 'x_%d' % b covariates.append(name) if name in params['theta_fixed']: data_model.beta[name] = params['theta_fixed'][name] else: data_model.beta[name] = np.random.normal(0, params['theta_sd']) if params['cov_unif_sd'] > 0.0: c = np.sqrt(12) / 2 def f_x(i_1, i_2): return np.random.uniform(-c * params['cov_unif_sd'], c * params['cov_unif_sd']) elif params['cov_norm_sd'] > 0.0: def f_x(i_1, i_2): return np.random.normal(0, params['cov_norm_sd']) elif params['cov_disc_sd'] > 0.0: def f_x(i_1, i_2): return (params['cov_disc_sd'] * (np.sign(np.random.random() - 0.5))) else: print 'Error: no covariate distribution specified.' sys.exit() arr.new_edge_covariate(name).from_binary_function_ind(f_x) # Generate large network, if necessary if not params['sampling'] == 'new': data_model.match_kappa(arr, params['kappa_target']) arr.generate(data_model) if params['fit_nonstationary']: fit_model = NonstationaryLogistic() else: fit_model = StationaryLogistic() for c in covariates: fit_model.beta[c] = None # Set up recording of results from experiment results = Results(params['sub_sizes_r'], params['sub_sizes_c'], params['num_reps'], interactive=params['interactive']) add_array_stats(results) if params['plot_sig']: from scipy.stats import chi2 crit = lambda dof: -0.5 * chi2.ppf(0.95, dof) umle_f = lambda n, f: f.nll(n, ignore_offset=True) umle_d = lambda n, d: d.nll(n, ignore_offset=True) umle_n = lambda n: NonstationaryLogistic().nll(n, ignore_offset=True) results.new('UMLE F-N', 'nm', lambda n, d, f: umle_f(n, f) - umle_n(n)) results.new('UMLE F-D', 'nm', lambda n, d, f: umle_f(n, f) - umle_d(n, d)) cmle_a_f = lambda n, f: acnll(n.as_dense(), np.exp(f.edge_probabilities(n))) cmle_a_d = lambda n, d: acnll(n.as_dense(), np.exp(d.edge_probabilities(n))) cmle_a_n = lambda n: acnll(n.as_dense(), np.ones_like(n.as_dense())) results.new('CMLE-A F-N', 'nm', lambda n, d, f: cmle_a_f(n, f) - cmle_a_n(n)) results.new('CMLE-A F-D', 'nm', lambda n, d, f: cmle_a_f(n, f) - cmle_a_d(n, d)) cmle_is_f = lambda n, f: f.fit_conditional(n, evaluate=True, T=50) cmle_is_d = lambda n, d: d.fit_conditional(n, evaluate=True, T=50) cmle_is_n = lambda n: NonstationaryLogistic().fit_conditional( n, evaluate=True, T=50) results.new('CMLE-IS F-N', 'nm', lambda n, d, f: cmle_is_f(n, f) - cmle_is_n(n)) results.new('CMLE-IS F-D', 'nm', lambda n, d, f: cmle_is_f(n, f) - cmle_is_d(n, d)) c_cmle_f = lambda n, f: f.fit_c_conditional(n, evaluate=True) c_cmle_d = lambda n, d: d.fit_c_conditional(n, evaluate=True) c_cmle_n = lambda n: NonstationaryLogistic().fit_c_conditional( n, evaluate=True) results.new('C-CMLE F-N', 'nm', lambda n, d, f: c_cmle_f(n, f) - c_cmle_n(n)) results.new('C-CMLE F-D', 'nm', lambda n, d, f: c_cmle_f(n, f) - c_cmle_d(n, d)) results.new('UMLE sig.', 'dof', lambda M, N, B: crit((M - 1) + (N - 1) + 1 + B)) results.new('CMLE sig.', 'dof', lambda M, N, B: crit(B)) results.new('C-CMLE sig.', 'dof', lambda M, N, B: crit((M - 1) + B)) if params['sampling'] == 'new': results.new('Subnetwork kappa', 'm', lambda d, f: d.kappa) def true_est_theta_c(c): return (lambda d, f: d.beta[c]), (lambda d, f: f.beta[c]) for c in covariates: # Need to do this hackily to avoid for-loop/lambda-binding weirdness. f_true, f_est = true_est_theta_c(c) results.new('True theta_{%s}' % c, 'm', f_true) results.new('Est. theta_{%s}' % c, 'm', f_est) if params['pre_offset'] or params['post_fit']: results.new('# Active', 'n', lambda n: np.isfinite(n.offset.matrix()).sum()) else: results.new('# Active', 'n', lambda n: n.M * n.N) if params['fisher_information']: def info_theta_c(c): def f_info_theta_c(d, f): return d.I_inv['theta_{%s}' % c] return f_info_theta_c for c in covariates: results.new('Info theta_{%s}' % c, 'm', info_theta_c(c)) if params['baseline']: def rel_mse_p_ij(n, d, f): P = d.edge_probabilities(n) return rel_mse(f.edge_probabilities(n), f.baseline(n), P) results.new('Rel. MSE(P_ij)', 'nm', rel_mse_p_ij) if not (params['pre_offset'] or params['post_fit']): def rel_mse_logit_p_ij(n, d, f): logit_P = d.edge_probabilities(n, logit=True) logit_Q = f.baseline_logit(n) return rel_mse(f.edge_probabilities(n, logit=True), logit_Q, logit_P) results.new('Rel. MSE(logit P_ij)', 'nm', rel_mse_logit_p_ij) if params['fit_method'] in [ 'convex_opt', 'conditional', 'c_conditional', 'irls', 'conditional_is' ]: results.new('Wall time (sec.)', 'm', lambda d, f: f.fit_info['wall_time']) if params['fit_method'] in ['convex_opt', 'conditional', 'conditional_is']: def work(f): w = 0 for work_type in ['nll_evals', 'grad_nll_evals', 'cnll_evals']: if work_type in f.fit_info: w += f.fit_info[work_type] return w results.new('Work', 'm', lambda d, f: work(f)) results.new('||ET_final - T||_2', 'm', lambda d, f: l2(f.fit_info['grad_nll_final'])) for sub_size in zip(results.M_sizes, results.N_sizes): print 'subnetwork size =', sub_size if params['sampling'] == 'new': gen = RandomSubnetworks(arr, sub_size) else: gen = RandomSubnetworks(arr, sub_size, method=params['sampling']) for rep in range(params['num_reps']): seed.next() sub = gen.sample() if params['fisher_information']: data_model.fisher_information(sub) if params['sampling'] == 'new': data_model.match_kappa(sub, params['kappa_target']) sub.generate(data_model) if params['load_fits']: fit, loaded_fits = loaded_fits[0], loaded_fits[1:] fit_model.beta = unpick(fit['theta']) if params['fix_broken_cmle_is']: for b_n in fit_model.beta: fit_model.beta[b_n] += 0.1474 if 'alpha' in fit: sub.row_covariates['alpha_out'] = unpick(fit['alpha']) if 'beta' in fit: sub.col_covariates['alpha_in'] = unpick(fit['beta']) if 'kappa' in fit: fit_model.kappa = fit['kappa'] if 'offset' in fit: sub.offset = unpick(fit['offset']) if 'fit_info' in fit: fit_model.fit_info = unpick(fit['fit_info']) else: if params['pre_offset']: sub.offset_extremes() if params['fit_method'] == 'convex_opt': fit_model.fit_convex_opt(sub, verbose=params['verbose']) elif params['fit_method'] == 'irls': fit_model.fit_irls(sub, verbose=params['verbose']) elif params['fit_method'] == 'logistic': fit_model.fit_logistic(sub) elif params['fit_method'] == 'logistic_l2': fit_model.fit_logistic_l2(sub, prior_precision=1.0) elif params['fit_method'] == 'conditional': fit_model.fit_conditional(sub, verbose=params['verbose']) elif params['fit_method'] == 'conditional_is': fit_model.fit_conditional(sub, T=params['is_T'], verbose=params['verbose']) elif params['fit_method'] == 'c_conditional': fit_model.fit_c_conditional(sub, verbose=params['verbose']) elif params['fit_method'] == 'composite': fit_model.fit_composite(sub, T=100, verbose=params['verbose']) elif params['fit_method'] == 'brazzale': fit_model.fit_brazzale(sub) elif params['fit_method'] == 'saddlepoint': fit_model.fit_saddlepoint(sub) elif params['fit_method'] == 'none': pass if params['post_fit']: sub.offset_extremes() fit_model.fit_convex_opt(sub, fix_beta=True) if params['dump_fits']: fit = {} fit['theta'] = pick(fit_model.beta) if 'alpha_out' in sub.row_covariates: fit['alpha'] = pick(sub.row_covariates['alpha_out']) if 'alpha_in' in sub.row_covariates: fit['beta'] = pick(sub.col_covariates['alpha_in']) if not fit_model.kappa is None: fit['kappa'] = fit_model.kappa if not sub.offset is None: sub.offset.dirty() fit['offset'] = pick(sub.offset) if not fit_model.fit_info is None: fit['fit_info'] = pick(fit_model.fit_info) fits.append(fit) if params['find_good'] > 0: abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0']) if abs_err < params['find_good']: print abs_err sub.offset = None fit_model.fit_conditional(sub, T=1000, verbose=True) print fit_model.beta['x_0'] print fit_model.fit_info f = file('goodmat.mat', 'wb') import scipy.io Y = np.array(sub.as_dense(), dtype=np.float) X = sub.edge_covariates['x_0'].matrix() scipy.io.savemat(f, {'Y': Y, 'X': X}) sys.exit() if params['find_bad'] > 0: abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0']) if abs_err > params['find_bad']: print abs_err sub.offset = None fit_model.fit_conditional(sub, T=1000, verbose=True) print fit_model.beta['x_0'] print fit_model.fit_info f = file('badmat.mat', 'wb') import scipy.io Y = np.array(sub.as_dense(), dtype=np.float) X = sub.edge_covariates['x_0'].matrix() scipy.io.savemat(f, {'Y': Y, 'X': X}) sys.exit() results.record(sub_size, rep, sub, data_model, fit_model) if params['verbose']: print if params['dump_fits']: with open(params['dump_fits'], 'w') as outfile: json.dump(([(p, pick(params[p])) for p in params], fits), outfile) # Compute beta MSEs covariate_naming = [] for c in covariates: mse_name = 'MSE(theta_{%s})' % c true_name = 'True theta_{%s}' % c est_name = 'Est. theta_{%s}' % c results.estimate_mse(mse_name, true_name, est_name) covariate_naming.append((c, mse_name, true_name, est_name)) # Report parameters for the run print 'Parameters:' for field in params: print '%s: %s' % (field, str(params[field])) # Should not vary between runs with the same seed and same number # of arrays tested seed.final() results.summary() return results, covariate_naming
from Network import Network from Models import NonstationaryLogistic, alpha_unif from Experiment import RandomSubnetworks from numpy.random import normal, seed # Seed random number for reproducible results seed(137) # Initialize full network N = 300 net = Network(N) alpha_unif(net, 0.5) # Initialize the data model; generate covariates and associated coefficients data_model = NonstationaryLogistic() data_model.kappa = -7.0 covariates = ['x_%d' % i for i in range(5)] for covariate in covariates: data_model.beta[covariate] = normal(0, 1.0) x_node = normal(0, 1.0, N) def f_x(i_1, i_2): return abs(x_node[i_1] - x_node[i_2]) < 0.3 net.new_edge_covariate(covariate).from_binary_function_ind(f_x) net.generate(data_model) net.offset_extremes() net.show() print 'True theta_0: %.2f' % data_model.beta['x_0']
from Network import Network from Models import NonstationaryLogistic, alpha_unif from Experiment import RandomSubnetworks from numpy.random import normal, seed # Seed random number for reproducible results seed(137) # Initialize full network N = 100 net = Network(N) alpha_unif(net, 0.5) # Initialize the data model; generate covariates and associated coefficients data_model = NonstationaryLogistic() data_model.kappa = -1.0 covariates = ['x_%d' % i for i in range(1)] for covariate in covariates: data_model.beta[covariate] = normal(0, 1.0) x_node = normal(0, 1.0, N) def f_x(i_1, i_2): return abs(x_node[i_1] - x_node[i_2]) < 0.6 net.new_edge_covariate(covariate).from_binary_function_ind(f_x) net.generate(data_model) net.offset_extremes() net.show() print 'True theta_0: %.2f' % data_model.beta['x_0']
for v_1, v_2, name in [(0, 0, 'll'), (1, 1, 'rr'), (0, 1, 'lr')]: def f_x(i_1, i_2): return ((net.node_covariates['value'][i_1] == v_1) and (net.node_covariates['value'][i_2] == v_2)) net.new_edge_covariate(name).from_binary_function_ind(f_x) def f_x(i_1, i_2): return np.random.uniform(-np.sqrt(3), np.sqrt(3)) net.new_edge_covariate('x').from_binary_function_ind(f_x) data_model = NonstationaryLogistic() data_model.beta['x'] = theta for name, block_theta in [('ll', 4.0), ('rr', 3.0), ('lr', -2.0)]: data_model.beta[name] = block_theta alpha_norm(net, alpha_sd) data_model.match_kappa(net, ('row_sum', 2)) net.generate(data_model) net.show_heatmap() net.offset_extremes() fit_base_model = NonstationaryLogistic() fit_base_model.beta['x'] = None fit_model = Blockmodel(fit_base_model, 2) #fit_model.base_model.fit = fit_model.base_model.fit_conditional # Initialize block assignments
net.new_node_covariate('value').from_pairs(net.names, [0]*(N/2) + [1]*(N/2)) for v_1, v_2, name in [(0, 0, 'll'), (1, 1, 'rr'), (0, 1, 'lr')]: def f_x(i_1, i_2): return ((net.node_covariates['value'][i_1] == v_1) and (net.node_covariates['value'][i_2] == v_2)) net.new_edge_covariate(name).from_binary_function_ind(f_x) def f_x(i_1, i_2): return np.random.uniform(-np.sqrt(3), np.sqrt(3)) net.new_edge_covariate('x').from_binary_function_ind(f_x) data_model = NonstationaryLogistic() data_model.beta['x'] = theta for name, block_theta in [('ll', 4.0), ('rr', 3.0), ('lr', -2.0)]: data_model.beta[name] = block_theta alpha_norm(net, alpha_sd) data_model.match_kappa(net, ('row_sum', 2)) net.generate(data_model) net.show_heatmap() net.offset_extremes() fit_base_model = NonstationaryLogistic() fit_base_model.beta['x'] = None fit_model = Blockmodel(fit_base_model, 2) #fit_model.base_model.fit = fit_model.base_model.fit_conditional
med_degree = np.median(degree) net.new_node_covariate('low_degree').from_pairs(net.names, degree < med_degree) for v_1, v_2, name in [(0, 0, 'high_to_high'), (1, 1, 'low_to_low'), (0, 1, 'high_to_low')]: covariates.append(name) def f_x(i_1, i_2): return ((net.node_covariates['low_degree'][i_1] == v_1) and (net.node_covariates['low_degree'][i_2] == v_2)) net.new_edge_covariate(name).from_binary_function_ind(f_x) # Initialize fitting model fit_model = StationaryLogistic() n_fit_model = NonstationaryLogistic() for c in covariates: fit_model.beta[c] = None n_fit_model.beta[c] = None # Set up recording of results from experiment results = Results(params['sub_sizes'], params['num_reps'], 'Stationary fit') add_network_stats(results) def est_theta_c(c): return lambda d, f: f.beta[c] for c in covariates: f_est = est_theta_c(c)