예제 #1
0
def ci_umle_wald(X, v, alpha_level):
    arr = array_from_data(X, [v])
    arr.offset_extremes()
    alpha_zero(arr)

    fit_model = NonstationaryLogistic()
    fit_model.beta['x_0'] = None
    fit_model.confidence_wald(arr, strict = False, alpha_level = alpha_level)

    return safe_ci(fit_model, 'x_0', 'wald_inverse')
예제 #2
0
def ci_umle_wald(X, v, alpha_level):
    arr = array_from_data(X, [v])
    arr.offset_extremes()
    alpha_zero(arr)

    fit_model = NonstationaryLogistic()
    fit_model.beta["x_0"] = None
    fit_model.confidence_wald(arr, alpha_level=alpha_level)

    return safe_ci(fit_model, "x_0", "wald")
예제 #3
0
def ci_umle_boot(X, v, alpha_level):
    arr = array_from_data(X, [v])
    arr.offset_extremes()
    alpha_zero(arr)

    fit_model = NonstationaryLogistic()
    fit_model.beta['x_0'] = None
    fit_model.confidence_boot(arr, alpha_level = alpha_level)

    return fit_model.conf['x_0']['pivotal']
예제 #4
0
def ci_brazzale(X, v, alpha_level):
    arr = array_from_data(X, [v])
    arr.offset_extremes()
    alpha_zero(arr)

    fit_model = NonstationaryLogistic()
    fit_model.beta['x_0'] = None
    fit_model.fit_brazzale(arr, 'x_0', alpha_level = alpha_level)

    return safe_ci(fit_model, 'x_0', 'brazzale')
예제 #5
0
def ci_umle(X, v, theta_grid, alpha_level):
    arr = array_from_data(X, [v])
    arr.offset_extremes()
    alpha_zero(arr)

    fit_model = NonstationaryLogistic()

    umle = np.empty_like(theta_grid)
    for l, theta_l in enumerate(theta_grid):
        fit_model.beta['x_0'] = theta_l
        fit_model.fit(arr, fix_beta = True)
        umle[l] = -fit_model.nll(arr)

    crit = -0.5 * chi2.ppf(1 - alpha_level, 1)
    ci = invert_test(theta_grid, umle - umle.max(), crit)
    if params['plot']:
        plot_statistics(ax_umle, theta_grid, umle - umle.max(), crit)
        umle_coverage_data['cis'].append(ci)
        umle_coverage_data['theta_grid'] = theta_grid
        umle_coverage_data['crit'] = crit
    return ci
theta_grid_max = 3.0
theta_grid_G = 121


def cond_a_nll(X, w):
    return cond_a_nll_b(X, w, sort_by_wopt_var = True)

def cond_a_sample(r, c, w, T = 0):
    return cond_a_sample_b(r, c, w, T, sort_by_wopt_var = True)

while True:
    a = Array(M, N)
    alpha_norm(a, 1.0)
    a.new_edge_covariate('x')[:,:] = np.random.normal(0, 1, (M, N))

    d = NonstationaryLogistic()
    d.beta['x'] = theta

    d.match_kappa(a, kappa_target)
    a.generate(d)

    f = NonstationaryLogistic()
    f.beta['x'] = None

    f.fit_conditional(a, T = T_fit, verbose = True)
    abs_err = abs(f.beta['x'] - d.beta['x'])
    if abs_err > min_error:
        print f.beta['x']
        break

theta_vec = np.linspace(theta_grid_min, theta_grid_max, theta_grid_G)
           'cov_structure': 'multimodal_4_cycles',
           'cov_mult': 2.0,
           'num_reps': 10,
           'coverage_increments': [0.01]*10 + [0.1]*10 + [0.2]*10 + [0.5]*10,
           'arb_init': False }


# Set random seed for reproducible output
np.random.seed(137)

# Initialize full network
net = Network(params['N'])
alpha_zero(net)

# Generate covariates and associated coefficients
data_model = NonstationaryLogistic()
covariates = []
for b in range(params['B']):
    name = 'x_%d' % b
    covariates.append(name)

    if name in params['theta_fixed']:
        data_model.beta[name] = params['theta_fixed'][name]
    else:
        data_model.beta[name] = np.random.normal(0, params['theta_sd'])

    if params['cov_structure'] == 'none':
        def f_x(i_1, i_2):
            return np.random.uniform(-np.sqrt(3), np.sqrt(3))
    elif params['cov_structure'] == 'unimodal':
        def f_x(i_1, i_2):
from Network import Network
from Models import NonstationaryLogistic, alpha_unif
from Experiment import RandomSubnetworks
from numpy.random import normal, seed

# Seed random number for reproducible results
seed(137)

# Initialize full network
N = 300
net = Network(N)
alpha_unif(net, 0.5)

# Initialize the data model; generate covariates and associated coefficients
data_model = NonstationaryLogistic()
data_model.kappa = -7.0
covariates = ['x_%d' % i for i in range(5)]
for covariate in covariates:
    data_model.beta[covariate] = normal(0, 1.0)

    x_node = normal(0, 1.0, N)
    def f_x(i_1, i_2):
        return abs(x_node[i_1] - x_node[i_2]) < 0.3
    net.new_edge_covariate(covariate).from_binary_function_ind(f_x)
net.generate(data_model)
net.offset_extremes()
net.show()
print 'True theta_0: %.2f' % data_model.beta['x_0']

# Initialize the fit model; specify which covariates it should have terms for
from Network import Network
from Models import NonstationaryLogistic, alpha_unif
from Experiment import RandomSubnetworks
from numpy.random import normal, seed

# Seed random number for reproducible results
seed(137)

# Initialize full network
N = 100
net = Network(N)
alpha_unif(net, 0.5)

# Initialize the data model; generate covariates and associated coefficients
data_model = NonstationaryLogistic()
data_model.kappa = -1.0
covariates = ['x_%d' % i for i in range(1)]
for covariate in covariates:
    data_model.beta[covariate] = normal(0, 1.0)

    x_node = normal(0, 1.0, N)
    def f_x(i_1, i_2):
        return abs(x_node[i_1] - x_node[i_2]) < 0.6
    net.new_edge_covariate(covariate).from_binary_function_ind(f_x)
net.generate(data_model)
net.offset_extremes()
net.show()
print 'True theta_0: %.2f' % data_model.beta['x_0']

# Initialize the fit model; specify which covariates it should have terms for
예제 #10
0

def cond_a_nll(X, w):
    return cond_a_nll_b(X, w, sort_by_wopt_var=True)


def cond_a_sample(r, c, w, T=0):
    return cond_a_sample_b(r, c, w, T, sort_by_wopt_var=True)


while True:
    a = Array(M, N)
    alpha_norm(a, 1.0)
    a.new_edge_covariate('x')[:, :] = np.random.normal(0, 1, (M, N))

    d = NonstationaryLogistic()
    d.beta['x'] = theta

    d.match_kappa(a, kappa_target)
    a.generate(d)

    f = NonstationaryLogistic()
    f.beta['x'] = None

    f.fit_conditional(a, T=T_fit, verbose=True)
    abs_err = abs(f.beta['x'] - d.beta['x'])
    if abs_err > min_error:
        print f.beta['x']
        break

theta_vec = np.linspace(theta_grid_min, theta_grid_max, theta_grid_G)
init_latex_rendering()

# Parameters
N = 25
G = 20
alpha_sd = 2.0
theta_true = { 'x_1': 2.0, 'x_2': -1.0 }
target_degree = 2

# Setup network
net = Network(N)
alpha_norm(net, alpha_sd)

# Setup data model and network covariates
data_model = NonstationaryLogistic()
covariates = []
for name in theta_true:
    covariates.append(name)

    data_model.beta[name] = theta_true[name]

    def f_x(i_1, i_2):
        return np.random.normal(0, 1.0)
    net.new_edge_covariate(name).from_binary_function_ind(f_x)

# Instantiate network according to data model
data_model.match_kappa(net, ('row_sum', target_degree))
net.generate(data_model)
#net.show_heatmap(order_by_row = 'alpha_out')
#net.show_heatmap(order_by_col = 'alpha_in')
예제 #12
0
    print '%s: %.2f' % (cov_name, c_model.base_model.beta[cov_name])
print
for rep in range(params['n_samples']):
    c_samples[rep,:,:] = c_model.generate(net, coverage = 0.1)
c_model.confidence_boot(net, n_bootstrap = params['n_bootstrap'])
c_model.confidence_wald(net)
for cov_name in cov_names:
    c_model.confidence_cons(net, cov_name, L = 121, test = 'score')
    c_model.confidence_cons(net, cov_name, L = 121, test = 'lr')
display_cis(c_model)

# Offset extreme substructure only for Nonstationary model
net.offset_extremes()

print 'Fitting nonstationary model'
ns_model = NonstationaryLogistic()
for cov_name in cov_names:
    ns_model.beta[cov_name] = None
ns_model.fit(net)
print 'NLL: %.2f' % ns_model.nll(net)
print 'kappa: %.2f' % ns_model.kappa
for cov_name in cov_names:
    print '%s: %.2f' % (cov_name, ns_model.beta[cov_name])
print
for rep in range(params['n_samples']):
    ns_samples[rep,:,:] = ns_model.generate(net)
ns_model.confidence_boot(net, n_bootstrap = params['n_bootstrap'])
ns_model.confidence_wald(net)
display_cis(ns_model)

# Calculate sample means and variances
예제 #13
0
from Network import Network
from Models import NonstationaryLogistic, alpha_unif
from Experiment import RandomSubnetworks
from numpy.random import normal, seed

# Seed random number for reproducible results
seed(137)

# Initialize full network
N = 300
net = Network(N)
alpha_unif(net, 0.5)

# Initialize the data model; generate covariates and associated coefficients
data_model = NonstationaryLogistic()
data_model.kappa = -7.0
covariates = ['x_%d' % i for i in range(1)]
for covariate in covariates:
    data_model.beta[covariate] = normal(0, 1.0)

    x_node = normal(0, 1.0, N)
    def f_x(i_1, i_2):
        return abs(x_node[i_1] - x_node[i_2]) < 0.3
    net.new_edge_covariate(covariate).from_binary_function_ind(f_x)
net.generate(data_model)
net.offset_extremes()
net.show()
print 'True theta_0: %.2f' % data_model.beta['x_0']

# Initialize the fit model; specify which covariates it should have terms for
예제 #14
0
N = 50
D = 1
theta = 2.0
kappa_target = ('row_sum', 2)
alpha_sd = 2.0
n_rep = 100
n_boot = 10
alpha_level = 0.05

net = Network(N)
alpha_norm(net, alpha_sd)
for d in range(D):
    net.new_edge_covariate('x_%d' % d)[:,:] = np.random.normal(0, 1, (N, N))

data_model = NonstationaryLogistic()
for d in range(D):
    data_model.beta['x_%d' % d] = np.random.normal(0, 1)
data_model.beta['x_0'] = theta
data_model.match_kappa(net, kappa_target)

s_fit = StationaryLogistic()
ns_fit = NonstationaryLogistic()
for d in range(D):
    s_fit.beta['x_%d' % d] = None
    ns_fit.beta['x_%d' % d] = None

def safe_ci(model, name, method):
    if name in model.conf:
        if method in model.conf[name]:
            return model.conf[name][method]
예제 #15
0
import numpy as np
import matplotlib.pyplot as plt

from Network import Network
from Models import StationaryLogistic, NonstationaryLogistic, alpha_unif
from Experiment import RandomSubnetworks
from Utility import draw_confidence

# Initialize full network
N = 300
sub_N = 100
net = Network(N)
alpha_unif(net, 0.5)

# Initialize the data model; generate covariates and associated coefficients
data_model = NonstationaryLogistic()
data_model.kappa = -7.0
covariates = ['x_1', 'x_2', 'x_3', 'x_4', 'x_5']
for covariate in covariates:
    data_model.beta[covariate] = np.random.normal(0, 1.0)

    x_node = np.random.normal(0, 1.0, N)
    def f_x(i_1, i_2):
        return abs(x_node[i_1] - x_node[i_2]) < 0.3
    net.new_edge_covariate(covariate).from_binary_function_ind(f_x)
net.generate(data_model)
print 'True beta_1: %.2f' % data_model.beta['x_1']

# Initialize the fit model; specify which covariates it should have terms for
fit_model = StationaryLogistic()
for covariate in covariates:
예제 #16
0
        for cov_name in cov_names:
            fit_model.beta[cov_name] = None
    fit_model.fit(net, verbose=params['verbose'])
    print 'NLL: %.2f' % fit_model.nll(net)
    print 'kappa: %.2f' % fit_model.kappa
    if use_covs:
        for cov_name in cov_names:
            print '%s: %.2f' % (cov_name, fit_model.beta[cov_name])
    print '\n'


fit_and_summarize('Stationary', Stationary(), False)
fit_and_summarize('Stationary', StationaryLogistic(), True)
if params['offset_extremes']:
    print 'Detecting subnetworks associated with infinite parameter estimates.\n'
    net.offset_extremes()
    if params['plot']: net.show_offset('pub_date')
fit_and_summarize('Nonstationary', NonstationaryLogistic(), False)
fit_and_summarize('Nonstationary', NonstationaryLogistic(), True)

# Redisplay heatmap, ordered by estimated alphas from last fit, i.e.,
# NonstationaryLogistic with publication date difference covariates
# XX: Following plots are broken
#if params['plot']:
#    net.show_heatmap('alpha_out')
#    net.show_heatmap('alpha_in')

outfile = open('scratch.json', 'w')
outfile.write(dump_to_json(net))
outfile.close()
    net.new_node_covariate('low_degree').from_pairs(net.names,
                                                    degree < med_degree)
    for v_1, v_2, name in [(0, 0, 'high_to_high'),
                           (1, 1, 'low_to_low'),
                           (0, 1, 'high_to_low')]:
        covariates.append(name)

        def f_x(i_1, i_2):
            return ((net.node_covariates['low_degree'][i_1] == v_1) and
                    (net.node_covariates['low_degree'][i_2] == v_2))

        net.new_edge_covariate(name).from_binary_function_ind(f_x)
    
# Initialize fitting model
fit_model = StationaryLogistic()
n_fit_model = NonstationaryLogistic()
for c in covariates:
    fit_model.beta[c] = None
    n_fit_model.beta[c] = None

# Set up recording of results from experiment
results = Results(params['sub_sizes'], params['num_reps'], 'Stationary fit')
add_network_stats(results)
def est_theta_c(c):
    return lambda d, f: f.beta[c]
for c in covariates:
    f_est = est_theta_c(c)
    results.new('%s' % c, 'm', f_est)

all_results = {}
if params['fit_stationary']:
data_model = StationaryLogistic()
covariates = []
data_model.beta = {}
for b in range(params['B']):
    name = 'x_%d' % b
    covariates.append(name)

    data_model.beta[name] = np.random.normal(0, params['beta_sd'])

    x_node = np.random.normal(0, 1, params['N'])
    def f_x(i_1, i_2):
        return abs(x_node[i_1] - x_node[i_2]) < params['x_diff_cutoff']
    net.new_edge_covariate(name).from_binary_function_ind(f_x)

if params['fit_nonstationary']:
    fit_model = NonstationaryLogistic()
else:
    fit_model = StationaryLogistic()
for c in covariates:
    fit_model.beta[c] = None

# Set up recording of results from experiment
results = Results(params['sub_sizes'], params['sub_sizes'],
                  params['num_reps'])
add_array_stats(results)
def f_c(c):
    return (lambda d, f: d.beta[c]), (lambda d, f: f.beta[c])
for c in covariates:
    # Need to do this hackily to avoid for-loop/lambda-binding weirdness.
    f_true, f_estimated = f_c(c)
    results.new('True beta_{%s}' % c, 'm', f_true)
# Initialize full network
net = Network(params['N'])

# Generate node-level propensities to extend and receive edges
if params['alpha_norm_sd'] > 0.0:
    alpha_norm(net, params['alpha_norm_sd'])
elif params['alpha_unif'] > 0.0:
    alpha_unif(net, params['alpha_unif'])
elif params['alpha_gamma_sd'] > 0.0:
    # Choosing location somewhat arbitrarily to give unit skewness
    alpha_gamma(net, 4.0, params['alpha_gamma_sd'])
else:
    alpha_zero(net)

# Generate covariates and associated coefficients
data_base_model = NonstationaryLogistic()
covariates = []
for b in range(params['B']):
    name = 'x_%d' % b
    covariates.append(name)

    data_base_model.beta[name] = np.random.normal(0, params['beta_sd'])

    def f_x(i_1, i_2):
        return np.random.uniform(-np.sqrt(3), np.sqrt(3))
    net.new_edge_covariate(name).from_binary_function_ind(f_x)

    
# Initialize data (block)model from base model
class_probs = np.random.dirichlet(np.repeat(params['class_conc'], params['K']))
z = np.where(np.random.multinomial(1, class_probs, params['N']) == 1)[1]
print 'kappa: %.2f' % s_model.kappa
for cov_name in cov_names:
    print '%s: %.2f' % (cov_name, s_model.beta[cov_name])
print
for rep in range(params['n_samples']):
    s_samples[rep, :, :] = s_model.generate(net)
s_model.confidence(net, n_bootstrap=params['n_bootstrap'])
print 'Pivotal:'
for cov_name in cov_names:
    ci = s_model.conf[cov_name]['pivotal']
    print ' %s: (%.2f, %.2f)' % (cov_name, ci[0], ci[1])
print

print 'Fitting nonstationary model'
alpha_zero(net)
ns_model = NonstationaryLogistic()
for cov_name in cov_names:
    ns_model.beta[cov_name] = None
ns_model.fit(net)
print 'NLL: %.2f' % ns_model.nll(net)
print 'kappa: %.2f' % ns_model.kappa
for cov_name in cov_names:
    print '%s: %.2f' % (cov_name, ns_model.beta[cov_name])
print
for rep in range(params['n_samples']):
    ns_samples[rep, :, :] = ns_model.generate(net)
ns_model.confidence(net, n_bootstrap=params['n_bootstrap'])
print 'Pivotal:'
for cov_name in cov_names:
    ci = ns_model.conf[cov_name]['pivotal']
    print ' %s: (%.2f, %.2f)' % (cov_name, ci[0], ci[1])
예제 #21
0
data_model.beta = {}
for b in range(params['B']):
    name = 'x_%d' % b
    covariates.append(name)

    data_model.beta[name] = np.random.normal(0, params['beta_sd'])

    x_node = np.random.normal(0, 1, params['N'])

    def f_x(i_1, i_2):
        return abs(x_node[i_1] - x_node[i_2]) < params['x_diff_cutoff']

    net.new_edge_covariate(name).from_binary_function_ind(f_x)

if params['fit_nonstationary']:
    fit_model = NonstationaryLogistic()
else:
    fit_model = StationaryLogistic()
for c in covariates:
    fit_model.beta[c] = None

# Set up recording of results from experiment
results = Results(params['sub_sizes'], params['sub_sizes'], params['num_reps'])
add_array_stats(results)


def f_c(c):
    return (lambda d, f: d.beta[c]), (lambda d, f: f.beta[c])


for c in covariates:
# Initialize array
arr = Array(params['M'], params['N'])

# Generate node-level propensities to extend and receive edges
if params['alpha_norm_sd'] > 0.0:
    alpha_norm(arr, params['alpha_norm_sd'])
elif params['alpha_unif_sd'] > 0.0:
    alpha_unif(arr, params['alpha_unif_sd'])
elif params['alpha_gamma_sd'] > 0.0:
    # Choosing location somewhat arbitrarily to give unit skewness
    alpha_gamma(arr, 4.0, params['alpha_gamma_sd'])
else:
    alpha_zero(arr)

# Generate covariates and associated coefficients
data_model = NonstationaryLogistic()
covariates = []
for b in range(params['B']):
    name = 'x_%d' % b
    covariates.append(name)

    if name in params['beta_fixed']:
        data_model.beta[name] = params['beta_fixed'][name]
    else:
        data_model.beta[name] = np.random.normal(0, params['beta_sd'])

    def f_x(i_1, i_2):
        return np.random.uniform(-np.sqrt(3), np.sqrt(3))
    arr.new_edge_covariate(name).from_binary_function_ind(f_x)
data_model.match_kappa(arr, params['kappa_target'])
예제 #23
0
import numpy as np
import matplotlib.pyplot as plt

from Network import Network
from Models import NonstationaryLogistic
from Models import alpha_zero, alpha_norm, alpha_gamma, alpha_unif
from Experiment import RandomSubnetworks

# Parameters
N = 300
reps = 10
sub_sizes = range(10, 110, 10)
kappa_target = ('row_sum', 2)

net = Network(N)
model = NonstationaryLogistic()

num_sizes = len(sub_sizes)
data_none = np.empty((num_sizes, reps))
data_het = np.empty((3, 3, num_sizes, reps))
for i, degree_het in enumerate(['Normal', 'Gamma', 'Uniform', 'None']):
    if degree_het == 'None':
        alpha_zero(net)
    for j, het_sd in enumerate([1.0, 2.0, 3.0, 0.0]):
        if degree_het == 'None' and het_sd != 0.0: continue
        if degree_het != 'None' and het_sd == 0.0: continue

        if degree_het == 'Normal':
            alpha_norm(net, het_sd)
        if degree_het == 'Gamma':
            alpha_gamma(net, 4.0, het_sd)
nx.draw(graph, pos, node_size = 10, with_labels = False)

print 'Fitting stationary model'
s_model = StationaryLogistic()
for cov_name in cov_names:
    s_model.beta[cov_name] = None
s_model.fit(net, verbose = True)
print 'NLL: %.2f' % s_model.nll(net)
print 'kappa: %.2f' % s_model.kappa
for cov_name in cov_names:
    print '%s: %.2f' % (cov_name, s_model.beta[cov_name])
print

print 'Fitting nonstationary model'
alpha_zero(net)
ns_model = NonstationaryLogistic()
for cov_name in cov_names:
    ns_model.beta[cov_name] = None
ns_model.fit(net, verbose = True)
print 'NLL: %.2f' % ns_model.nll(net)
print 'kappa: %.2f' % ns_model.kappa
for cov_name in cov_names:
    print '%s: %.2f' % (cov_name, ns_model.beta[cov_name])
print

print 'Fitting conditional model'
c_model = FixedMargins(StationaryLogistic())
for cov_name in cov_names:
    c_model.base_model.beta[cov_name] = None
c_model.base_model.fit_conditional(net, verbose = True)
print 'NLL: %.2f' % c_model.nll(net)
예제 #25
0
from Network import Network
from Models import NonstationaryLogistic, alpha_unif
from Experiment import RandomSubnetworks
from numpy.random import normal, seed

# Seed random number for reproducible results
seed(137)

# Initialize full network
N = 300
net = Network(N)
alpha_unif(net, 0.5)

# Initialize the data model; generate covariates and associated coefficients
data_model = NonstationaryLogistic()
data_model.kappa = -7.0
covariates = ['x_%d' % i for i in range(1)]
for covariate in covariates:
    data_model.beta[covariate] = normal(0, 1.0)

    x_node = normal(0, 1.0, N)

    def f_x(i_1, i_2):
        return abs(x_node[i_1] - x_node[i_2]) < 0.3

    net.new_edge_covariate(covariate).from_binary_function_ind(f_x)
net.generate(data_model)
net.offset_extremes()
net.show()
print 'True theta_0: %.2f' % data_model.beta['x_0']
예제 #26
0
           'sub_sizes': np.floor(np.logspace(1.0, 2.1, 20)),
           'verbose': True,
           'plot_mse': True,
           'plot_network': False,
           'plot_fit_info': True }


# Set random seed for reproducible output
np.random.seed(137)

# Initialize full network
net = Network(params['N'])
alpha_zero(net)

# Generate covariates and associated coefficients
data_model = NonstationaryLogistic()
for b in range(params['B']):
    name = 'x_%d' % b

    if name in params['theta_fixed']:
        data_model.beta[name] = params['theta_fixed'][name]
    else:
        data_model.beta[name] = np.random.normal(0, params['theta_sd'])

    if params['cov_unif_sd'] > 0.0:
        c = np.sqrt(12) / 2
        def f_x(i_1, i_2):
            return np.random.uniform(-c * params['cov_unif_sd'],
                                     c * params['cov_unif_sd'])
    elif params['cov_norm_sd'] > 0.0:
        def f_x(i_1, i_2):
예제 #27
0
init_latex_rendering()

# Parameters
N = 20
G = 30
alpha_sd = 2.0
theta_true = {'x_1': 2.0, 'x_2': -1.0}
target_degree = 2

# Setup network
net = Network(N)
alpha_norm(net, alpha_sd)

# Setup data model and network covariates
data_model = NonstationaryLogistic()
covariates = []
for name in theta_true:
    covariates.append(name)

    data_model.beta[name] = theta_true[name]

    def f_x(i_1, i_2):
        return np.random.normal(0, 1.0)

    net.new_edge_covariate(name).from_binary_function_ind(f_x)

# Instantiate network according to data model
data_model.match_kappa(net, ('row_sum', target_degree))
net.generate(data_model)
net.show_heatmap(order_by_row='alpha_out')
# Initialize array
arr = Array(params['M'], params['N'])

# Generate node-level propensities to extend and receive edges
if params['alpha_norm_sd'] > 0.0:
    alpha_norm(arr, params['alpha_norm_sd'])
elif params['alpha_unif_sd'] > 0.0:
    alpha_unif(arr, params['alpha_unif_sd'])
elif params['alpha_gamma_sd'] > 0.0:
    # Choosing location somewhat arbitrarily to give unit skewness
    alpha_gamma(arr, 4.0, params['alpha_gamma_sd'])
else:
    alpha_zero(arr)

# Generate covariates and associated coefficients
data_model = NonstationaryLogistic()
covariates = []
for b in range(params['B']):
    name = 'x_%d' % b
    covariates.append(name)

    if name in params['beta_fixed']:
        data_model.beta[name] = params['beta_fixed'][name]
    else:
        data_model.beta[name] = np.random.normal(0, params['beta_sd'])

    def f_x(i_1, i_2):
        return np.random.uniform(-np.sqrt(3), np.sqrt(3))

    arr.new_edge_covariate(name).from_binary_function_ind(f_x)
data_model.match_kappa(arr, params['kappa_target'])
예제 #29
0
def do_experiment(params):
    if params['dump_fits'] and params['load_fits']:
        print 'Warning: simultaneously dumping and loading is a bad idea.'
        
    if params['dump_fits']:
        fits = []

    if params['load_fits']:
        with open(params['load_fits'], 'r') as fits_file:
            loaded_params_pick, loaded_fits = json.load(fits_file)

        loaded_params = dict([(k,unpick(v)) for (k,v) in loaded_params_pick])

        # Compare on parameters that control data generation and inference
        run_params = ['N', 'B', 'theta_sd', 'theta_fixed',
                      'alpha_unif_sd', 'alpha_norm_sd', 'alpha_gamma_sd',
                      'cov_unif_sd', 'cov_norm_sd', 'cov_disc_sd',
                      'kappa_target', 'pre_offset', 'post_fit',
                      'fit_nonstationary', 'fit_method', 'num_reps',
                      'is_T', 'sampling', 'sub_sizes_r', 'sub_sizes_c',
                      'random_seed']

        for p in run_params:
            if not np.all(loaded_params[p] == params[p]):
                print 'Warning: load mismatch on', p
    
    # Set random seed for reproducible output
    seed = Seed(params['random_seed'])

    # Initialize full network
    arr = Network(params['N'])

    # Generate node-level propensities to extend and receive edges
    if params['alpha_norm_sd'] > 0.0:
        alpha_norm(arr, params['alpha_norm_sd'])
    elif params['alpha_unif_sd'] > 0.0:
        alpha_unif(arr, params['alpha_unif_sd'])
    elif params['alpha_gamma_sd'] > 0.0:
        # Choosing location somewhat arbitrarily to give unit skewness
        alpha_gamma(arr, 4.0, params['alpha_gamma_sd'])
    else:
        alpha_zero(arr)

    # Generate covariates and associated coefficients
    data_model = NonstationaryLogistic()
    covariates = []
    for b in range(params['B']):
        name = 'x_%d' % b
        covariates.append(name)

        if name in params['theta_fixed']:
            data_model.beta[name] = params['theta_fixed'][name]
        else:
            data_model.beta[name] = np.random.normal(0, params['theta_sd'])

        if params['cov_unif_sd'] > 0.0:
            c = np.sqrt(12) / 2
            def f_x(i_1, i_2):
                return np.random.uniform(-c * params['cov_unif_sd'],
                                         c * params['cov_unif_sd'])
        elif params['cov_norm_sd'] > 0.0:
            def f_x(i_1, i_2):
                return np.random.normal(0, params['cov_norm_sd'])
        elif params['cov_disc_sd'] > 0.0:
            def f_x(i_1, i_2):
                return (params['cov_disc_sd'] *
                        (np.sign(np.random.random() - 0.5)))
        else:
            print 'Error: no covariate distribution specified.'
            sys.exit()

        arr.new_edge_covariate(name).from_binary_function_ind(f_x)

    # Generate large network, if necessary
    if not params['sampling'] == 'new':
        data_model.match_kappa(arr, params['kappa_target'])
        arr.generate(data_model)

    if params['fit_nonstationary']:
        fit_model = NonstationaryLogistic()
    else:
        fit_model = StationaryLogistic()
    for c in covariates:
        fit_model.beta[c] = None

    # Set up recording of results from experiment
    results = Results(params['sub_sizes_r'], params['sub_sizes_c'],
                      params['num_reps'], interactive = params['interactive'])
    add_array_stats(results)
    if params['plot_sig']:
        from scipy.stats import chi2
        crit = lambda dof: -0.5 * chi2.ppf(0.95, dof)

        umle_f = lambda n, f: f.nll(n, ignore_offset = True)
        umle_d = lambda n, d: d.nll(n, ignore_offset = True)
        umle_n = lambda n: NonstationaryLogistic().nll(n, ignore_offset = True)
        results.new('UMLE F-N', 'nm',
                    lambda n, d, f: umle_f(n, f) - umle_n(n))
        results.new('UMLE F-D', 'nm',
                    lambda n, d, f: umle_f(n, f) - umle_d(n, d))

        cmle_a_f = lambda n, f: acnll(n.as_dense(), np.exp(f.edge_probabilities(n)))
        cmle_a_d = lambda n, d: acnll(n.as_dense(), np.exp(d.edge_probabilities(n)))
        cmle_a_n = lambda n: acnll(n.as_dense(), np.ones_like(n.as_dense()))
        results.new('CMLE-A F-N', 'nm',
                    lambda n, d, f: cmle_a_f(n, f) - cmle_a_n(n))
        results.new('CMLE-A F-D', 'nm',
                    lambda n, d, f: cmle_a_f(n, f) - cmle_a_d(n, d))

        cmle_is_f = lambda n, f: f.fit_conditional(n, evaluate = True, T = 50)
        cmle_is_d = lambda n, d: d.fit_conditional(n, evaluate = True, T = 50)
        cmle_is_n = lambda n: NonstationaryLogistic().fit_conditional(n, evaluate = True, T = 50)
        results.new('CMLE-IS F-N', 'nm',
                    lambda n, d, f: cmle_is_f(n, f) - cmle_is_n(n))
        results.new('CMLE-IS F-D', 'nm',
                    lambda n, d, f: cmle_is_f(n, f) - cmle_is_d(n, d))

        c_cmle_f = lambda n, f: f.fit_c_conditional(n, evaluate = True)
        c_cmle_d = lambda n, d: d.fit_c_conditional(n, evaluate = True)
        c_cmle_n = lambda n: NonstationaryLogistic().fit_c_conditional(n, evaluate = True)
        results.new('C-CMLE F-N', 'nm',
                    lambda n, d, f: c_cmle_f(n, f) - c_cmle_n(n))
        results.new('C-CMLE F-D', 'nm',
                    lambda n, d, f: c_cmle_f(n, f) - c_cmle_d(n, d))

        results.new('UMLE sig.', 'dof',
                    lambda M, N, B: crit((M - 1) + (N - 1) + 1 + B))
        results.new('CMLE sig.', 'dof', lambda M, N, B: crit(B))
        results.new('C-CMLE sig.', 'dof', lambda M, N, B: crit((M - 1) + B))

    if params['sampling'] == 'new':
        results.new('Subnetwork kappa', 'm', lambda d, f: d.kappa)
    def true_est_theta_c(c):
        return (lambda d, f: d.beta[c]), (lambda d, f: f.beta[c])
    for c in covariates:
        # Need to do this hackily to avoid for-loop/lambda-binding weirdness.
        f_true, f_est = true_est_theta_c(c)
        results.new('True theta_{%s}' % c, 'm', f_true)
        results.new('Est. theta_{%s}' % c, 'm', f_est)
    if params['pre_offset'] or params['post_fit']:
        results.new('# Active', 'n',
                    lambda n: np.isfinite(n.offset.matrix()).sum())
    else:
        results.new('# Active', 'n', lambda n: n.M * n.N)
    if params['fisher_information']:
        def info_theta_c(c):
            def f_info_theta_c(d, f):
                return d.I_inv['theta_{%s}' % c]
            return f_info_theta_c
        for c in covariates:
            results.new('Info theta_{%s}' % c, 'm', info_theta_c(c))
    if params['baseline']:
        def rel_mse_p_ij(n, d, f):
            P = d.edge_probabilities(n)
            return rel_mse(f.edge_probabilities(n), f.baseline(n), P)
        results.new('Rel. MSE(P_ij)', 'nm', rel_mse_p_ij)
        if not (params['pre_offset'] or params['post_fit']):
            def rel_mse_logit_p_ij(n, d, f):
                logit_P = d.edge_probabilities(n, logit = True)
                logit_Q = f.baseline_logit(n)
                return rel_mse(f.edge_probabilities(n, logit = True),
                               logit_Q, logit_P)
            results.new('Rel. MSE(logit P_ij)', 'nm', rel_mse_logit_p_ij)

    if params['fit_method'] in ['convex_opt', 'conditional', 'c_conditional',
                                'irls', 'conditional_is']:
        results.new('Wall time (sec.)', 'm',
                    lambda d, f: f.fit_info['wall_time'])
    if params['fit_method'] in ['convex_opt',
                                'conditional', 'conditional_is']:
        def work(f):
            w = 0
            for work_type in ['nll_evals', 'grad_nll_evals', 'cnll_evals']:
                if work_type in f.fit_info:
                    w += f.fit_info[work_type]
            return w
        results.new('Work', 'm', lambda d, f: work(f))
        results.new('||ET_final - T||_2', 'm',
                    lambda d, f: l2(f.fit_info['grad_nll_final']))

    for sub_size in zip(results.M_sizes, results.N_sizes):
        print 'subnetwork size =', sub_size

        if params['sampling'] == 'new':
            gen = RandomSubnetworks(arr, sub_size)
        else:
            gen = RandomSubnetworks(arr, sub_size,
                                    method = params['sampling'])

        for rep in range(params['num_reps']):
            seed.next()
            sub = gen.sample()

            if params['fisher_information']:
                data_model.fisher_information(sub)

            if params['sampling'] == 'new':
                data_model.match_kappa(sub, params['kappa_target'])
                sub.generate(data_model)

            if params['load_fits']:
                fit, loaded_fits = loaded_fits[0], loaded_fits[1:]
                fit_model.beta = unpick(fit['theta'])
                if params['fix_broken_cmle_is']:
                    for b_n in fit_model.beta:
                        fit_model.beta[b_n] += 0.1474
                if 'alpha' in fit:
                    sub.row_covariates['alpha_out'] = unpick(fit['alpha'])
                if 'beta' in fit:
                    sub.col_covariates['alpha_in'] = unpick(fit['beta'])
                if 'kappa' in fit:
                    fit_model.kappa = fit['kappa']
                if 'offset' in fit:
                    sub.offset = unpick(fit['offset'])
                if 'fit_info' in fit:
                    fit_model.fit_info = unpick(fit['fit_info'])
            else:
                if params['pre_offset']:
                    sub.offset_extremes()

                if params['fit_method'] == 'convex_opt':
                    fit_model.fit_convex_opt(sub,
                                             verbose = params['verbose'])
                elif params['fit_method'] == 'irls':
                    fit_model.fit_irls(sub, verbose = params['verbose'])
                elif params['fit_method'] == 'logistic':
                    fit_model.fit_logistic(sub)
                elif params['fit_method'] == 'logistic_l2':
                    fit_model.fit_logistic_l2(sub, prior_precision = 1.0)
                elif params['fit_method'] == 'conditional':
                    fit_model.fit_conditional(sub,
                                              verbose = params['verbose'])
                elif params['fit_method'] == 'conditional_is':
                    fit_model.fit_conditional(sub, T = params['is_T'],
                                              verbose = params['verbose'])
                elif params['fit_method'] == 'c_conditional':
                    fit_model.fit_c_conditional(sub,
                                                verbose = params['verbose'])
                elif params['fit_method'] == 'composite':
                    fit_model.fit_composite(sub, T = 100,
                                            verbose = params['verbose'])
                elif params['fit_method'] == 'brazzale':
                    fit_model.fit_brazzale(sub)
                elif params['fit_method'] == 'saddlepoint':
                    fit_model.fit_saddlepoint(sub)
                elif params['fit_method'] == 'none':
                    pass

                if params['post_fit']:
                    sub.offset_extremes()
                    fit_model.fit_convex_opt(sub, fix_beta = True)

                if params['dump_fits']:
                    fit = {}
                    fit['theta'] = pick(fit_model.beta)
                    if 'alpha_out' in sub.row_covariates:
                        fit['alpha'] = pick(sub.row_covariates['alpha_out'])
                    if 'alpha_in' in sub.row_covariates:
                        fit['beta'] = pick(sub.col_covariates['alpha_in'])
                    if not fit_model.kappa is None:
                        fit['kappa'] = fit_model.kappa
                    if not sub.offset is None:
                        sub.offset.dirty()
                        fit['offset'] = pick(sub.offset)
                    if not fit_model.fit_info is None:
                        fit['fit_info'] = pick(fit_model.fit_info)

                    fits.append(fit)

            if params['find_good'] > 0:
                abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0'])
                if abs_err < params['find_good']:
                    print abs_err

                    sub.offset = None
                    fit_model.fit_conditional(sub, T = 1000,
                                              verbose = True)
                    print fit_model.beta['x_0']
                    print fit_model.fit_info

                    f = file('goodmat.mat', 'wb')
                    import scipy.io
                    Y = np.array(sub.as_dense(), dtype=np.float)
                    X = sub.edge_covariates['x_0'].matrix()
                    scipy.io.savemat(f, { 'Y': Y, 'X': X })
                    sys.exit()

            if params['find_bad'] > 0:
                abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0'])
                if abs_err > params['find_bad']:
                    print abs_err

                    sub.offset = None
                    fit_model.fit_conditional(sub, T = 1000,
                                              verbose = True)
                    print fit_model.beta['x_0']
                    print fit_model.fit_info

                    f = file('badmat.mat', 'wb')
                    import scipy.io
                    Y = np.array(sub.as_dense(), dtype=np.float)
                    X = sub.edge_covariates['x_0'].matrix()
                    scipy.io.savemat(f, { 'Y': Y, 'X': X })
                    sys.exit()

            results.record(sub_size, rep, sub, data_model, fit_model)

            if params['verbose']:
                print

    if params['dump_fits']:
        with open(params['dump_fits'], 'w') as outfile:
            json.dump(([(p, pick(params[p])) for p in params], fits), outfile)

    # Compute beta MSEs
    covariate_naming = []
    for c in covariates:
        mse_name = 'MSE(theta_{%s})' % c
        true_name = 'True theta_{%s}' % c
        est_name = 'Est. theta_{%s}' % c
        results.estimate_mse(mse_name, true_name, est_name)
        covariate_naming.append((c, mse_name, true_name, est_name))

    # Report parameters for the run
    print 'Parameters:'
    for field in params:
        print '%s: %s' % (field, str(params[field]))

    # Should not vary between runs with the same seed and same number
    # of arrays tested
    seed.final()

    results.summary()

    return results, covariate_naming
nx.draw(graph, pos, node_size=10, with_labels=False)

print 'Fitting stationary model'
s_model = StationaryLogistic()
for cov_name in cov_names:
    s_model.beta[cov_name] = None
s_model.fit(net, verbose=True)
print 'NLL: %.2f' % s_model.nll(net)
print 'kappa: %.2f' % s_model.kappa
for cov_name in cov_names:
    print '%s: %.2f' % (cov_name, s_model.beta[cov_name])
print

print 'Fitting nonstationary model'
alpha_zero(net)
ns_model = NonstationaryLogistic()
for cov_name in cov_names:
    ns_model.beta[cov_name] = None
ns_model.fit(net, verbose=True)
print 'NLL: %.2f' % ns_model.nll(net)
print 'kappa: %.2f' % ns_model.kappa
for cov_name in cov_names:
    print '%s: %.2f' % (cov_name, ns_model.beta[cov_name])
print

print 'Fitting conditional model'
c_model = StationaryLogistic()
for cov_name in cov_names:
    c_model.beta[cov_name] = None
c_model.fit_conditional(net, T=0, verbose=True)
print 'NLL: %.2f' % c_model.nll(net)
예제 #31
0
def do_experiment(params):
    if params['dump_fits'] and params['load_fits']:
        print 'Warning: simultaneously dumping and loading is a bad idea.'

    if params['dump_fits']:
        fits = []

    if params['load_fits']:
        with open(params['load_fits'], 'r') as fits_file:
            loaded_params_pick, loaded_fits = json.load(fits_file)

        loaded_params = dict([(k, unpick(v)) for (k, v) in loaded_params_pick])

        # Compare on parameters that control data generation and inference
        run_params = [
            'N', 'B', 'theta_sd', 'theta_fixed', 'alpha_unif_sd',
            'alpha_norm_sd', 'alpha_gamma_sd', 'cov_unif_sd', 'cov_norm_sd',
            'cov_disc_sd', 'kappa_target', 'pre_offset', 'post_fit',
            'fit_nonstationary', 'fit_method', 'num_reps', 'is_T', 'sampling',
            'sub_sizes_r', 'sub_sizes_c', 'random_seed'
        ]

        for p in run_params:
            if not np.all(loaded_params[p] == params[p]):
                print 'Warning: load mismatch on', p

    # Set random seed for reproducible output
    seed = Seed(params['random_seed'])

    # Initialize full network
    arr = Network(params['N'])

    # Generate node-level propensities to extend and receive edges
    if params['alpha_norm_sd'] > 0.0:
        alpha_norm(arr, params['alpha_norm_sd'])
    elif params['alpha_unif_sd'] > 0.0:
        alpha_unif(arr, params['alpha_unif_sd'])
    elif params['alpha_gamma_sd'] > 0.0:
        # Choosing location somewhat arbitrarily to give unit skewness
        alpha_gamma(arr, 4.0, params['alpha_gamma_sd'])
    else:
        alpha_zero(arr)

    # Generate covariates and associated coefficients
    data_model = NonstationaryLogistic()
    covariates = []
    for b in range(params['B']):
        name = 'x_%d' % b
        covariates.append(name)

        if name in params['theta_fixed']:
            data_model.beta[name] = params['theta_fixed'][name]
        else:
            data_model.beta[name] = np.random.normal(0, params['theta_sd'])

        if params['cov_unif_sd'] > 0.0:
            c = np.sqrt(12) / 2

            def f_x(i_1, i_2):
                return np.random.uniform(-c * params['cov_unif_sd'],
                                         c * params['cov_unif_sd'])
        elif params['cov_norm_sd'] > 0.0:

            def f_x(i_1, i_2):
                return np.random.normal(0, params['cov_norm_sd'])
        elif params['cov_disc_sd'] > 0.0:

            def f_x(i_1, i_2):
                return (params['cov_disc_sd'] *
                        (np.sign(np.random.random() - 0.5)))
        else:
            print 'Error: no covariate distribution specified.'
            sys.exit()

        arr.new_edge_covariate(name).from_binary_function_ind(f_x)

    # Generate large network, if necessary
    if not params['sampling'] == 'new':
        data_model.match_kappa(arr, params['kappa_target'])
        arr.generate(data_model)

    if params['fit_nonstationary']:
        fit_model = NonstationaryLogistic()
    else:
        fit_model = StationaryLogistic()
    for c in covariates:
        fit_model.beta[c] = None

    # Set up recording of results from experiment
    results = Results(params['sub_sizes_r'],
                      params['sub_sizes_c'],
                      params['num_reps'],
                      interactive=params['interactive'])
    add_array_stats(results)
    if params['plot_sig']:
        from scipy.stats import chi2
        crit = lambda dof: -0.5 * chi2.ppf(0.95, dof)

        umle_f = lambda n, f: f.nll(n, ignore_offset=True)
        umle_d = lambda n, d: d.nll(n, ignore_offset=True)
        umle_n = lambda n: NonstationaryLogistic().nll(n, ignore_offset=True)
        results.new('UMLE F-N', 'nm', lambda n, d, f: umle_f(n, f) - umle_n(n))
        results.new('UMLE F-D', 'nm',
                    lambda n, d, f: umle_f(n, f) - umle_d(n, d))

        cmle_a_f = lambda n, f: acnll(n.as_dense(),
                                      np.exp(f.edge_probabilities(n)))
        cmle_a_d = lambda n, d: acnll(n.as_dense(),
                                      np.exp(d.edge_probabilities(n)))
        cmle_a_n = lambda n: acnll(n.as_dense(), np.ones_like(n.as_dense()))
        results.new('CMLE-A F-N', 'nm',
                    lambda n, d, f: cmle_a_f(n, f) - cmle_a_n(n))
        results.new('CMLE-A F-D', 'nm',
                    lambda n, d, f: cmle_a_f(n, f) - cmle_a_d(n, d))

        cmle_is_f = lambda n, f: f.fit_conditional(n, evaluate=True, T=50)
        cmle_is_d = lambda n, d: d.fit_conditional(n, evaluate=True, T=50)
        cmle_is_n = lambda n: NonstationaryLogistic().fit_conditional(
            n, evaluate=True, T=50)
        results.new('CMLE-IS F-N', 'nm',
                    lambda n, d, f: cmle_is_f(n, f) - cmle_is_n(n))
        results.new('CMLE-IS F-D', 'nm',
                    lambda n, d, f: cmle_is_f(n, f) - cmle_is_d(n, d))

        c_cmle_f = lambda n, f: f.fit_c_conditional(n, evaluate=True)
        c_cmle_d = lambda n, d: d.fit_c_conditional(n, evaluate=True)
        c_cmle_n = lambda n: NonstationaryLogistic().fit_c_conditional(
            n, evaluate=True)
        results.new('C-CMLE F-N', 'nm',
                    lambda n, d, f: c_cmle_f(n, f) - c_cmle_n(n))
        results.new('C-CMLE F-D', 'nm',
                    lambda n, d, f: c_cmle_f(n, f) - c_cmle_d(n, d))

        results.new('UMLE sig.', 'dof', lambda M, N, B: crit((M - 1) +
                                                             (N - 1) + 1 + B))
        results.new('CMLE sig.', 'dof', lambda M, N, B: crit(B))
        results.new('C-CMLE sig.', 'dof', lambda M, N, B: crit((M - 1) + B))

    if params['sampling'] == 'new':
        results.new('Subnetwork kappa', 'm', lambda d, f: d.kappa)

    def true_est_theta_c(c):
        return (lambda d, f: d.beta[c]), (lambda d, f: f.beta[c])

    for c in covariates:
        # Need to do this hackily to avoid for-loop/lambda-binding weirdness.
        f_true, f_est = true_est_theta_c(c)
        results.new('True theta_{%s}' % c, 'm', f_true)
        results.new('Est. theta_{%s}' % c, 'm', f_est)
    if params['pre_offset'] or params['post_fit']:
        results.new('# Active', 'n',
                    lambda n: np.isfinite(n.offset.matrix()).sum())
    else:
        results.new('# Active', 'n', lambda n: n.M * n.N)
    if params['fisher_information']:

        def info_theta_c(c):
            def f_info_theta_c(d, f):
                return d.I_inv['theta_{%s}' % c]

            return f_info_theta_c

        for c in covariates:
            results.new('Info theta_{%s}' % c, 'm', info_theta_c(c))
    if params['baseline']:

        def rel_mse_p_ij(n, d, f):
            P = d.edge_probabilities(n)
            return rel_mse(f.edge_probabilities(n), f.baseline(n), P)

        results.new('Rel. MSE(P_ij)', 'nm', rel_mse_p_ij)
        if not (params['pre_offset'] or params['post_fit']):

            def rel_mse_logit_p_ij(n, d, f):
                logit_P = d.edge_probabilities(n, logit=True)
                logit_Q = f.baseline_logit(n)
                return rel_mse(f.edge_probabilities(n, logit=True), logit_Q,
                               logit_P)

            results.new('Rel. MSE(logit P_ij)', 'nm', rel_mse_logit_p_ij)

    if params['fit_method'] in [
            'convex_opt', 'conditional', 'c_conditional', 'irls',
            'conditional_is'
    ]:
        results.new('Wall time (sec.)', 'm',
                    lambda d, f: f.fit_info['wall_time'])
    if params['fit_method'] in ['convex_opt', 'conditional', 'conditional_is']:

        def work(f):
            w = 0
            for work_type in ['nll_evals', 'grad_nll_evals', 'cnll_evals']:
                if work_type in f.fit_info:
                    w += f.fit_info[work_type]
            return w

        results.new('Work', 'm', lambda d, f: work(f))
        results.new('||ET_final - T||_2', 'm',
                    lambda d, f: l2(f.fit_info['grad_nll_final']))

    for sub_size in zip(results.M_sizes, results.N_sizes):
        print 'subnetwork size =', sub_size

        if params['sampling'] == 'new':
            gen = RandomSubnetworks(arr, sub_size)
        else:
            gen = RandomSubnetworks(arr, sub_size, method=params['sampling'])

        for rep in range(params['num_reps']):
            seed.next()
            sub = gen.sample()

            if params['fisher_information']:
                data_model.fisher_information(sub)

            if params['sampling'] == 'new':
                data_model.match_kappa(sub, params['kappa_target'])
                sub.generate(data_model)

            if params['load_fits']:
                fit, loaded_fits = loaded_fits[0], loaded_fits[1:]
                fit_model.beta = unpick(fit['theta'])
                if params['fix_broken_cmle_is']:
                    for b_n in fit_model.beta:
                        fit_model.beta[b_n] += 0.1474
                if 'alpha' in fit:
                    sub.row_covariates['alpha_out'] = unpick(fit['alpha'])
                if 'beta' in fit:
                    sub.col_covariates['alpha_in'] = unpick(fit['beta'])
                if 'kappa' in fit:
                    fit_model.kappa = fit['kappa']
                if 'offset' in fit:
                    sub.offset = unpick(fit['offset'])
                if 'fit_info' in fit:
                    fit_model.fit_info = unpick(fit['fit_info'])
            else:
                if params['pre_offset']:
                    sub.offset_extremes()

                if params['fit_method'] == 'convex_opt':
                    fit_model.fit_convex_opt(sub, verbose=params['verbose'])
                elif params['fit_method'] == 'irls':
                    fit_model.fit_irls(sub, verbose=params['verbose'])
                elif params['fit_method'] == 'logistic':
                    fit_model.fit_logistic(sub)
                elif params['fit_method'] == 'logistic_l2':
                    fit_model.fit_logistic_l2(sub, prior_precision=1.0)
                elif params['fit_method'] == 'conditional':
                    fit_model.fit_conditional(sub, verbose=params['verbose'])
                elif params['fit_method'] == 'conditional_is':
                    fit_model.fit_conditional(sub,
                                              T=params['is_T'],
                                              verbose=params['verbose'])
                elif params['fit_method'] == 'c_conditional':
                    fit_model.fit_c_conditional(sub, verbose=params['verbose'])
                elif params['fit_method'] == 'composite':
                    fit_model.fit_composite(sub,
                                            T=100,
                                            verbose=params['verbose'])
                elif params['fit_method'] == 'brazzale':
                    fit_model.fit_brazzale(sub)
                elif params['fit_method'] == 'saddlepoint':
                    fit_model.fit_saddlepoint(sub)
                elif params['fit_method'] == 'none':
                    pass

                if params['post_fit']:
                    sub.offset_extremes()
                    fit_model.fit_convex_opt(sub, fix_beta=True)

                if params['dump_fits']:
                    fit = {}
                    fit['theta'] = pick(fit_model.beta)
                    if 'alpha_out' in sub.row_covariates:
                        fit['alpha'] = pick(sub.row_covariates['alpha_out'])
                    if 'alpha_in' in sub.row_covariates:
                        fit['beta'] = pick(sub.col_covariates['alpha_in'])
                    if not fit_model.kappa is None:
                        fit['kappa'] = fit_model.kappa
                    if not sub.offset is None:
                        sub.offset.dirty()
                        fit['offset'] = pick(sub.offset)
                    if not fit_model.fit_info is None:
                        fit['fit_info'] = pick(fit_model.fit_info)

                    fits.append(fit)

            if params['find_good'] > 0:
                abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0'])
                if abs_err < params['find_good']:
                    print abs_err

                    sub.offset = None
                    fit_model.fit_conditional(sub, T=1000, verbose=True)
                    print fit_model.beta['x_0']
                    print fit_model.fit_info

                    f = file('goodmat.mat', 'wb')
                    import scipy.io
                    Y = np.array(sub.as_dense(), dtype=np.float)
                    X = sub.edge_covariates['x_0'].matrix()
                    scipy.io.savemat(f, {'Y': Y, 'X': X})
                    sys.exit()

            if params['find_bad'] > 0:
                abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0'])
                if abs_err > params['find_bad']:
                    print abs_err

                    sub.offset = None
                    fit_model.fit_conditional(sub, T=1000, verbose=True)
                    print fit_model.beta['x_0']
                    print fit_model.fit_info

                    f = file('badmat.mat', 'wb')
                    import scipy.io
                    Y = np.array(sub.as_dense(), dtype=np.float)
                    X = sub.edge_covariates['x_0'].matrix()
                    scipy.io.savemat(f, {'Y': Y, 'X': X})
                    sys.exit()

            results.record(sub_size, rep, sub, data_model, fit_model)

            if params['verbose']:
                print

    if params['dump_fits']:
        with open(params['dump_fits'], 'w') as outfile:
            json.dump(([(p, pick(params[p])) for p in params], fits), outfile)

    # Compute beta MSEs
    covariate_naming = []
    for c in covariates:
        mse_name = 'MSE(theta_{%s})' % c
        true_name = 'True theta_{%s}' % c
        est_name = 'Est. theta_{%s}' % c
        results.estimate_mse(mse_name, true_name, est_name)
        covariate_naming.append((c, mse_name, true_name, est_name))

    # Report parameters for the run
    print 'Parameters:'
    for field in params:
        print '%s: %s' % (field, str(params[field]))

    # Should not vary between runs with the same seed and same number
    # of arrays tested
    seed.final()

    results.summary()

    return results, covariate_naming
예제 #32
0
from Network import Network
from Models import NonstationaryLogistic, alpha_unif
from Experiment import RandomSubnetworks
from numpy.random import normal, seed

# Seed random number for reproducible results
seed(137)

# Initialize full network
N = 300
net = Network(N)
alpha_unif(net, 0.5)

# Initialize the data model; generate covariates and associated coefficients
data_model = NonstationaryLogistic()
data_model.kappa = -7.0
covariates = ['x_%d' % i for i in range(5)]
for covariate in covariates:
    data_model.beta[covariate] = normal(0, 1.0)

    x_node = normal(0, 1.0, N)

    def f_x(i_1, i_2):
        return abs(x_node[i_1] - x_node[i_2]) < 0.3

    net.new_edge_covariate(covariate).from_binary_function_ind(f_x)
net.generate(data_model)
net.offset_extremes()
net.show()
print 'True theta_0: %.2f' % data_model.beta['x_0']
from Network import Network
from Models import NonstationaryLogistic, alpha_unif
from Experiment import RandomSubnetworks
from numpy.random import normal, seed

# Seed random number for reproducible results
seed(137)

# Initialize full network
N = 100
net = Network(N)
alpha_unif(net, 0.5)

# Initialize the data model; generate covariates and associated coefficients
data_model = NonstationaryLogistic()
data_model.kappa = -1.0
covariates = ['x_%d' % i for i in range(1)]
for covariate in covariates:
    data_model.beta[covariate] = normal(0, 1.0)

    x_node = normal(0, 1.0, N)

    def f_x(i_1, i_2):
        return abs(x_node[i_1] - x_node[i_2]) < 0.6

    net.new_edge_covariate(covariate).from_binary_function_ind(f_x)
net.generate(data_model)
net.offset_extremes()
net.show()
print 'True theta_0: %.2f' % data_model.beta['x_0']
예제 #34
0
for v_1, v_2, name in [(0, 0, 'll'), (1, 1, 'rr'), (0, 1, 'lr')]:

    def f_x(i_1, i_2):
        return ((net.node_covariates['value'][i_1] == v_1)
                and (net.node_covariates['value'][i_2] == v_2))

    net.new_edge_covariate(name).from_binary_function_ind(f_x)


def f_x(i_1, i_2):
    return np.random.uniform(-np.sqrt(3), np.sqrt(3))


net.new_edge_covariate('x').from_binary_function_ind(f_x)

data_model = NonstationaryLogistic()
data_model.beta['x'] = theta
for name, block_theta in [('ll', 4.0), ('rr', 3.0), ('lr', -2.0)]:
    data_model.beta[name] = block_theta
alpha_norm(net, alpha_sd)
data_model.match_kappa(net, ('row_sum', 2))
net.generate(data_model)
net.show_heatmap()
net.offset_extremes()

fit_base_model = NonstationaryLogistic()
fit_base_model.beta['x'] = None
fit_model = Blockmodel(fit_base_model, 2)
#fit_model.base_model.fit = fit_model.base_model.fit_conditional

# Initialize block assignments
net.new_node_covariate('value').from_pairs(net.names, [0]*(N/2) + [1]*(N/2))
for v_1, v_2, name in [(0, 0, 'll'),
                       (1, 1, 'rr'),
                       (0, 1, 'lr')]:
    def f_x(i_1, i_2):
        return ((net.node_covariates['value'][i_1] == v_1) and
                (net.node_covariates['value'][i_2] == v_2))

    net.new_edge_covariate(name).from_binary_function_ind(f_x)

def f_x(i_1, i_2):
    return np.random.uniform(-np.sqrt(3), np.sqrt(3))
net.new_edge_covariate('x').from_binary_function_ind(f_x)
        
data_model = NonstationaryLogistic()
data_model.beta['x'] = theta
for name, block_theta in [('ll', 4.0),
                          ('rr', 3.0),
                          ('lr', -2.0)]:
    data_model.beta[name] = block_theta
alpha_norm(net, alpha_sd)
data_model.match_kappa(net, ('row_sum', 2))
net.generate(data_model)
net.show_heatmap()
net.offset_extremes()

fit_base_model = NonstationaryLogistic()
fit_base_model.beta['x'] = None
fit_model = Blockmodel(fit_base_model, 2)
#fit_model.base_model.fit = fit_model.base_model.fit_conditional
예제 #36
0
# Initialize full network
net = Network(params['N'])

# Generate node-level propensities to extend and receive edges
if params['alpha_norm_sd'] > 0.0:
    alpha_norm(net, params['alpha_norm_sd'])
elif params['alpha_unif'] > 0.0:
    alpha_unif(net, params['alpha_unif'])
elif params['alpha_gamma_sd'] > 0.0:
    # Choosing location somewhat arbitrarily to give unit skewness
    alpha_gamma(net, 4.0, params['alpha_gamma_sd'])
else:
    alpha_zero(net)

# Generate covariates and associated coefficients
data_base_model = NonstationaryLogistic()
covariates = []
for b in range(params['B']):
    name = 'x_%d' % b
    covariates.append(name)

    data_base_model.beta[name] = np.random.normal(0, params['beta_sd'])

    def f_x(i_1, i_2):
        return np.random.uniform(-np.sqrt(3), np.sqrt(3))

    net.new_edge_covariate(name).from_binary_function_ind(f_x)

# Initialize data (block)model from base model
class_probs = np.random.dirichlet(np.repeat(params['class_conc'], params['K']))
z = np.where(np.random.multinomial(1, class_probs, params['N']) == 1)[1]
예제 #37
0
    med_degree = np.median(degree)
    net.new_node_covariate('low_degree').from_pairs(net.names,
                                                    degree < med_degree)
    for v_1, v_2, name in [(0, 0, 'high_to_high'), (1, 1, 'low_to_low'),
                           (0, 1, 'high_to_low')]:
        covariates.append(name)

        def f_x(i_1, i_2):
            return ((net.node_covariates['low_degree'][i_1] == v_1)
                    and (net.node_covariates['low_degree'][i_2] == v_2))

        net.new_edge_covariate(name).from_binary_function_ind(f_x)

# Initialize fitting model
fit_model = StationaryLogistic()
n_fit_model = NonstationaryLogistic()
for c in covariates:
    fit_model.beta[c] = None
    n_fit_model.beta[c] = None

# Set up recording of results from experiment
results = Results(params['sub_sizes'], params['num_reps'], 'Stationary fit')
add_network_stats(results)


def est_theta_c(c):
    return lambda d, f: f.beta[c]


for c in covariates:
    f_est = est_theta_c(c)