Example #1
0
def do_experiment(params):
    seed = Seed(params['random_seed'])

    alpha = params['alpha_level']
    verbose = params['verbose']

    L = params['L']
    S = len(params['n_MC_levels'])
    T = params['is_T']

    # Set up structure and methods for recording results
    results = {'completed_trials': 0}
    for method, display in [('cmle_a', 'CMLE-A'),
                            ('cmle_is', 'CMLE-IS (T = %d)' % T)] + \
                           [('cons_%d' % s, 'Conservative (n = %d)' % n_MC)
                            for s, n_MC in enumerate(params['n_MC_levels'])]:
        results[method] = {
            'display': display,
            'in_interval': [],
            'length': [],
            'total_time': 0.0
        }

    def do_and_record(out, name):
        ci, elapsed = out
        ci_l, ci_u = ci

        result = results[name]

        print '%s (%.2f sec): [%.2f, %.2f]' % \
          (result['display'], elapsed, ci_l, ci_u)

        result['in_interval'].append(ci_l <= params['theta'] <= ci_u)
        result['length'].append(ci_u - ci_l)
        result['total_time'] += elapsed

    # Do experiment
    for X, v in generate_data(params, seed):
        if (results['completed_trials'] == params['n_rep']) or terminated:
            break

        theta_grid = np.linspace(params['theta_l'], params['theta_u'], L)

        do_and_record(ci_cmle_a(X, v, theta_grid, alpha), 'cmle_a')

        do_and_record(ci_cmle_is(X, v, theta_grid, alpha, T, verbose),
                      'cmle_is')

        for s, n_MC in enumerate(params['n_MC_levels']):
            do_and_record(
                ci_conservative(X, v, n_MC, theta_grid, alpha, verbose),
                'cons_%d' % s)

        results['completed_trials'] += 1

    # For verifying that same data was generated even if different
    # algorithms consumed a different amount of randomness
    seed.final()

    return results
def do_experiment(params):
    seed = Seed(params['random_seed'])

    alpha = params['alpha_level']
    verbose = params['verbose']

    L = params['L']
    S = len(params['n_MC_levels'])
    T = params['is_T']
    
    # Set up structure and methods for recording results
    results = { 'completed_trials': 0 }
    for method, display in [('cmle_a', 'CMLE-A'),
                            ('cmle_is', 'CMLE-IS (T = %d)' % T)] + \
                           [('cons_%d' % s, 'Conservative (n = %d)' % n_MC)
                            for s, n_MC in enumerate(params['n_MC_levels'])]:
        results[method] = { 'display': display,
                            'in_interval': [],
                            'length': [],
                            'total_time': 0.0 }
    def do_and_record(out, name):
        ci, elapsed = out
        ci_l, ci_u = ci

        result = results[name]

        print '%s (%.2f sec): [%.2f, %.2f]' % \
          (result['display'], elapsed, ci_l, ci_u)

        result['in_interval'].append(ci_l <= params['theta'] <= ci_u)
        result['length'].append(ci_u - ci_l)
        result['total_time'] += elapsed

    # Do experiment
    for X, v in generate_data(params, seed):
        if (results['completed_trials'] == params['n_rep']) or terminated:
            break

        theta_grid = np.linspace(params['theta_l'], params['theta_u'], L)

        do_and_record(ci_cmle_a(X, v, theta_grid, alpha),
                      'cmle_a')

        do_and_record(ci_cmle_is(X, v, theta_grid, alpha, T, verbose),
                      'cmle_is')

        for s, n_MC in enumerate(params['n_MC_levels']):
            do_and_record(ci_conservative(X, v, n_MC, theta_grid, alpha,
                                          verbose),
                          'cons_%d' % s)

        results['completed_trials'] += 1

    # For verifying that same data was generated even if different
    # algorithms consumed a different amount of randomness
    seed.final()

    return results
def do_experiment(params):
    if params['dump_fits'] and params['load_fits']:
        print 'Warning: simultaneously dumping and loading is a bad idea.'

    if params['dump_fits']:
        fits = []

    if params['load_fits']:
        with open(params['load_fits'], 'r') as fits_file:
            loaded_params_pick, loaded_fits = json.load(fits_file)

        loaded_params = dict([(k, unpick(v)) for (k, v) in loaded_params_pick])

        # Compare on parameters that control data generation and inference
        run_params = [
            'N', 'B', 'theta_sd', 'theta_fixed', 'alpha_unif_sd',
            'alpha_norm_sd', 'alpha_gamma_sd', 'cov_unif_sd', 'cov_norm_sd',
            'cov_disc_sd', 'kappa_target', 'pre_offset', 'post_fit',
            'fit_nonstationary', 'fit_method', 'num_reps', 'is_T', 'sampling',
            'sub_sizes_r', 'sub_sizes_c', 'random_seed'
        ]

        for p in run_params:
            if not np.all(loaded_params[p] == params[p]):
                print 'Warning: load mismatch on', p

    # Set random seed for reproducible output
    seed = Seed(params['random_seed'])

    # Initialize full network
    arr = Network(params['N'])

    # Generate node-level propensities to extend and receive edges
    if params['alpha_norm_sd'] > 0.0:
        alpha_norm(arr, params['alpha_norm_sd'])
    elif params['alpha_unif_sd'] > 0.0:
        alpha_unif(arr, params['alpha_unif_sd'])
    elif params['alpha_gamma_sd'] > 0.0:
        # Choosing location somewhat arbitrarily to give unit skewness
        alpha_gamma(arr, 4.0, params['alpha_gamma_sd'])
    else:
        alpha_zero(arr)

    # Generate covariates and associated coefficients
    data_model = NonstationaryLogistic()
    covariates = []
    for b in range(params['B']):
        name = 'x_%d' % b
        covariates.append(name)

        if name in params['theta_fixed']:
            data_model.beta[name] = params['theta_fixed'][name]
        else:
            data_model.beta[name] = np.random.normal(0, params['theta_sd'])

        if params['cov_unif_sd'] > 0.0:
            c = np.sqrt(12) / 2

            def f_x(i_1, i_2):
                return np.random.uniform(-c * params['cov_unif_sd'],
                                         c * params['cov_unif_sd'])
        elif params['cov_norm_sd'] > 0.0:

            def f_x(i_1, i_2):
                return np.random.normal(0, params['cov_norm_sd'])
        elif params['cov_disc_sd'] > 0.0:

            def f_x(i_1, i_2):
                return (params['cov_disc_sd'] *
                        (np.sign(np.random.random() - 0.5)))
        else:
            print 'Error: no covariate distribution specified.'
            sys.exit()

        arr.new_edge_covariate(name).from_binary_function_ind(f_x)

    # Generate large network, if necessary
    if not params['sampling'] == 'new':
        data_model.match_kappa(arr, params['kappa_target'])
        arr.generate(data_model)

    if params['fit_nonstationary']:
        fit_model = NonstationaryLogistic()
    else:
        fit_model = StationaryLogistic()
    for c in covariates:
        fit_model.beta[c] = None

    # Set up recording of results from experiment
    results = Results(params['sub_sizes_r'],
                      params['sub_sizes_c'],
                      params['num_reps'],
                      interactive=params['interactive'])
    add_array_stats(results)
    if params['plot_sig']:
        from scipy.stats import chi2
        crit = lambda dof: -0.5 * chi2.ppf(0.95, dof)

        umle_f = lambda n, f: f.nll(n, ignore_offset=True)
        umle_d = lambda n, d: d.nll(n, ignore_offset=True)
        umle_n = lambda n: NonstationaryLogistic().nll(n, ignore_offset=True)
        results.new('UMLE F-N', 'nm', lambda n, d, f: umle_f(n, f) - umle_n(n))
        results.new('UMLE F-D', 'nm',
                    lambda n, d, f: umle_f(n, f) - umle_d(n, d))

        cmle_a_f = lambda n, f: acnll(n.as_dense(),
                                      np.exp(f.edge_probabilities(n)))
        cmle_a_d = lambda n, d: acnll(n.as_dense(),
                                      np.exp(d.edge_probabilities(n)))
        cmle_a_n = lambda n: acnll(n.as_dense(), np.ones_like(n.as_dense()))
        results.new('CMLE-A F-N', 'nm',
                    lambda n, d, f: cmle_a_f(n, f) - cmle_a_n(n))
        results.new('CMLE-A F-D', 'nm',
                    lambda n, d, f: cmle_a_f(n, f) - cmle_a_d(n, d))

        cmle_is_f = lambda n, f: f.fit_conditional(n, evaluate=True, T=50)
        cmle_is_d = lambda n, d: d.fit_conditional(n, evaluate=True, T=50)
        cmle_is_n = lambda n: NonstationaryLogistic().fit_conditional(
            n, evaluate=True, T=50)
        results.new('CMLE-IS F-N', 'nm',
                    lambda n, d, f: cmle_is_f(n, f) - cmle_is_n(n))
        results.new('CMLE-IS F-D', 'nm',
                    lambda n, d, f: cmle_is_f(n, f) - cmle_is_d(n, d))

        c_cmle_f = lambda n, f: f.fit_c_conditional(n, evaluate=True)
        c_cmle_d = lambda n, d: d.fit_c_conditional(n, evaluate=True)
        c_cmle_n = lambda n: NonstationaryLogistic().fit_c_conditional(
            n, evaluate=True)
        results.new('C-CMLE F-N', 'nm',
                    lambda n, d, f: c_cmle_f(n, f) - c_cmle_n(n))
        results.new('C-CMLE F-D', 'nm',
                    lambda n, d, f: c_cmle_f(n, f) - c_cmle_d(n, d))

        results.new('UMLE sig.', 'dof', lambda M, N, B: crit((M - 1) +
                                                             (N - 1) + 1 + B))
        results.new('CMLE sig.', 'dof', lambda M, N, B: crit(B))
        results.new('C-CMLE sig.', 'dof', lambda M, N, B: crit((M - 1) + B))

    if params['sampling'] == 'new':
        results.new('Subnetwork kappa', 'm', lambda d, f: d.kappa)

    def true_est_theta_c(c):
        return (lambda d, f: d.beta[c]), (lambda d, f: f.beta[c])

    for c in covariates:
        # Need to do this hackily to avoid for-loop/lambda-binding weirdness.
        f_true, f_est = true_est_theta_c(c)
        results.new('True theta_{%s}' % c, 'm', f_true)
        results.new('Est. theta_{%s}' % c, 'm', f_est)
    if params['pre_offset'] or params['post_fit']:
        results.new('# Active', 'n',
                    lambda n: np.isfinite(n.offset.matrix()).sum())
    else:
        results.new('# Active', 'n', lambda n: n.M * n.N)
    if params['fisher_information']:

        def info_theta_c(c):
            def f_info_theta_c(d, f):
                return d.I_inv['theta_{%s}' % c]

            return f_info_theta_c

        for c in covariates:
            results.new('Info theta_{%s}' % c, 'm', info_theta_c(c))
    if params['baseline']:

        def rel_mse_p_ij(n, d, f):
            P = d.edge_probabilities(n)
            return rel_mse(f.edge_probabilities(n), f.baseline(n), P)

        results.new('Rel. MSE(P_ij)', 'nm', rel_mse_p_ij)
        if not (params['pre_offset'] or params['post_fit']):

            def rel_mse_logit_p_ij(n, d, f):
                logit_P = d.edge_probabilities(n, logit=True)
                logit_Q = f.baseline_logit(n)
                return rel_mse(f.edge_probabilities(n, logit=True), logit_Q,
                               logit_P)

            results.new('Rel. MSE(logit P_ij)', 'nm', rel_mse_logit_p_ij)

    if params['fit_method'] in [
            'convex_opt', 'conditional', 'c_conditional', 'irls',
            'conditional_is'
    ]:
        results.new('Wall time (sec.)', 'm',
                    lambda d, f: f.fit_info['wall_time'])
    if params['fit_method'] in ['convex_opt', 'conditional', 'conditional_is']:

        def work(f):
            w = 0
            for work_type in ['nll_evals', 'grad_nll_evals', 'cnll_evals']:
                if work_type in f.fit_info:
                    w += f.fit_info[work_type]
            return w

        results.new('Work', 'm', lambda d, f: work(f))
        results.new('||ET_final - T||_2', 'm',
                    lambda d, f: l2(f.fit_info['grad_nll_final']))

    for sub_size in zip(results.M_sizes, results.N_sizes):
        print 'subnetwork size =', sub_size

        if params['sampling'] == 'new':
            gen = RandomSubnetworks(arr, sub_size)
        else:
            gen = RandomSubnetworks(arr, sub_size, method=params['sampling'])

        for rep in range(params['num_reps']):
            seed.next()
            sub = gen.sample()

            if params['fisher_information']:
                data_model.fisher_information(sub)

            if params['sampling'] == 'new':
                data_model.match_kappa(sub, params['kappa_target'])
                sub.generate(data_model)

            if params['load_fits']:
                fit, loaded_fits = loaded_fits[0], loaded_fits[1:]
                fit_model.beta = unpick(fit['theta'])
                if params['fix_broken_cmle_is']:
                    for b_n in fit_model.beta:
                        fit_model.beta[b_n] += 0.1474
                if 'alpha' in fit:
                    sub.row_covariates['alpha_out'] = unpick(fit['alpha'])
                if 'beta' in fit:
                    sub.col_covariates['alpha_in'] = unpick(fit['beta'])
                if 'kappa' in fit:
                    fit_model.kappa = fit['kappa']
                if 'offset' in fit:
                    sub.offset = unpick(fit['offset'])
                if 'fit_info' in fit:
                    fit_model.fit_info = unpick(fit['fit_info'])
            else:
                if params['pre_offset']:
                    sub.offset_extremes()

                if params['fit_method'] == 'convex_opt':
                    fit_model.fit_convex_opt(sub, verbose=params['verbose'])
                elif params['fit_method'] == 'irls':
                    fit_model.fit_irls(sub, verbose=params['verbose'])
                elif params['fit_method'] == 'logistic':
                    fit_model.fit_logistic(sub)
                elif params['fit_method'] == 'logistic_l2':
                    fit_model.fit_logistic_l2(sub, prior_precision=1.0)
                elif params['fit_method'] == 'conditional':
                    fit_model.fit_conditional(sub, verbose=params['verbose'])
                elif params['fit_method'] == 'conditional_is':
                    fit_model.fit_conditional(sub,
                                              T=params['is_T'],
                                              verbose=params['verbose'])
                elif params['fit_method'] == 'c_conditional':
                    fit_model.fit_c_conditional(sub, verbose=params['verbose'])
                elif params['fit_method'] == 'composite':
                    fit_model.fit_composite(sub,
                                            T=100,
                                            verbose=params['verbose'])
                elif params['fit_method'] == 'brazzale':
                    fit_model.fit_brazzale(sub)
                elif params['fit_method'] == 'saddlepoint':
                    fit_model.fit_saddlepoint(sub)
                elif params['fit_method'] == 'none':
                    pass

                if params['post_fit']:
                    sub.offset_extremes()
                    fit_model.fit_convex_opt(sub, fix_beta=True)

                if params['dump_fits']:
                    fit = {}
                    fit['theta'] = pick(fit_model.beta)
                    if 'alpha_out' in sub.row_covariates:
                        fit['alpha'] = pick(sub.row_covariates['alpha_out'])
                    if 'alpha_in' in sub.row_covariates:
                        fit['beta'] = pick(sub.col_covariates['alpha_in'])
                    if not fit_model.kappa is None:
                        fit['kappa'] = fit_model.kappa
                    if not sub.offset is None:
                        sub.offset.dirty()
                        fit['offset'] = pick(sub.offset)
                    if not fit_model.fit_info is None:
                        fit['fit_info'] = pick(fit_model.fit_info)

                    fits.append(fit)

            if params['find_good'] > 0:
                abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0'])
                if abs_err < params['find_good']:
                    print abs_err

                    sub.offset = None
                    fit_model.fit_conditional(sub, T=1000, verbose=True)
                    print fit_model.beta['x_0']
                    print fit_model.fit_info

                    f = file('goodmat.mat', 'wb')
                    import scipy.io
                    Y = np.array(sub.as_dense(), dtype=np.float)
                    X = sub.edge_covariates['x_0'].matrix()
                    scipy.io.savemat(f, {'Y': Y, 'X': X})
                    sys.exit()

            if params['find_bad'] > 0:
                abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0'])
                if abs_err > params['find_bad']:
                    print abs_err

                    sub.offset = None
                    fit_model.fit_conditional(sub, T=1000, verbose=True)
                    print fit_model.beta['x_0']
                    print fit_model.fit_info

                    f = file('badmat.mat', 'wb')
                    import scipy.io
                    Y = np.array(sub.as_dense(), dtype=np.float)
                    X = sub.edge_covariates['x_0'].matrix()
                    scipy.io.savemat(f, {'Y': Y, 'X': X})
                    sys.exit()

            results.record(sub_size, rep, sub, data_model, fit_model)

            if params['verbose']:
                print

    if params['dump_fits']:
        with open(params['dump_fits'], 'w') as outfile:
            json.dump(([(p, pick(params[p])) for p in params], fits), outfile)

    # Compute beta MSEs
    covariate_naming = []
    for c in covariates:
        mse_name = 'MSE(theta_{%s})' % c
        true_name = 'True theta_{%s}' % c
        est_name = 'Est. theta_{%s}' % c
        results.estimate_mse(mse_name, true_name, est_name)
        covariate_naming.append((c, mse_name, true_name, est_name))

    # Report parameters for the run
    print 'Parameters:'
    for field in params:
        print '%s: %s' % (field, str(params[field]))

    # Should not vary between runs with the same seed and same number
    # of arrays tested
    seed.final()

    results.summary()

    return results, covariate_naming
def do_experiment(params):
    seed = Seed(params['random_seed'])

    alpha_level = params['alpha_level']
    verbose = params['verbose']

    L = params['L']
    S = len(params['n_MC_levels'])
    T = params['is_T']
    
    # Set up structure and methods for recording results
    results = { 'completed_trials': 0 }
    for method, disp in [#('umle_wald', 'UMLE Wald'),
                         #('umle_boot', 'UMLE bootstrap (pivotal)'),
                         #('cmle_wald', 'CMLE Wald'),
                         #('cmle_boot', 'CMLE bootstrap (pivotal)'),
                         #('brazzale', 'Conditional (Brazzale)'),
                         #('umle', 'UMLE LR'),
                         #('cmle_a', 'CMLE-A LR'),
                         #('cmle_is', 'CMLE-IS (T = %d) LR' % T)
                        ] + \
                        [('is_sc_c_%d' % n_MC, 'IS-score (n = %d)' % n_MC)
                         for n_MC in params['n_MC_levels']] + \
                        [('is_lr_c_%d' % n_MC, 'IS-LR (n = %d)' % n_MC)
                         for n_MC in params['n_MC_levels']]:
                        #[('is_sc_u_%d' % n_MC, 'IS-score [un] (n = %d)' % n_MC)
                        # for n_MC in params['n_MC_levels']] + \
                        #[('is_lr_u_%d' % n_MC, 'IS-LR [un] (n = %d)' % n_MC)
                        # for n_MC in params['n_MC_levels']] + \
        results[method] = { 'display': disp,
                            'in_interval': [],
                            'length': [],
                            'total_time': 0.0 }
    def do(out, name):
        ci, elapsed = out
        ci_l, ci_u = ci

        result = results[name]

        print '%s (%.2f sec): [%.2f, %.2f]' % \
          (result['display'], elapsed, ci_l, ci_u)

        result['in_interval'].append(ci_l <= params['theta'] <= ci_u)
        result['length'].append(ci_u - ci_l)
        result['total_time'] += elapsed

    # Do experiment
    for X, v in generate_data(params['case'], params['theta'], seed):
        if (results['completed_trials'] == params['n_rep']) or terminated:
            break

        theta_grid = np.linspace(params['theta_l'], params['theta_u'], L)

        #do(ci_umle_wald(X, v, alpha_level), 'umle_wald')

        #do(ci_umle_boot(X, v, alpha_level), 'umle_boot')

        #do(ci_cmle_wald(X, v, alpha_level), 'cmle_wald')

        #do(ci_cmle_boot(X, v, alpha_level), 'cmle_boot')

        #do(ci_brazzale(X, v, alpha_level), 'brazzale')

        #do(ci_umle(X, v, theta_grid, alpha_level), 'umle')

        #do(ci_cmle_a(X, v, theta_grid, alpha_level), 'cmle_a')

        #do(ci_cmle_is(X, v, theta_grid, alpha_level, T, verbose), 'cmle_is')

        for n_MC in params['n_MC_levels']:
            for test in ['lr', 'score']:
                for corrected_str, corrected in [('c', True)]: #, ('u', False)]:
                    do(ci_cons(X, v, alpha_level, params['L'],
                               params['theta_l'], params['theta_u'],
                               n_MC, test = test, corrected = corrected,
                               verbose = verbose),
                       'is_%s_%s_%d' % (test[0:2], corrected_str, n_MC))

        results['completed_trials'] += 1

    # For verifying that same data was generated even if different
    # algorithms consumed a different amount of randomness
    seed.final()

    return results
def do_experiment(params):
    seed = Seed(params["random_seed"])

    alpha_level = params["alpha_level"]
    verbose = params["verbose"]

    L = params["L"]
    S = len(params["n_MC_levels"])
    T = params["is_T"]

    # Set up structure and methods for recording results
    results = {"completed_trials": 0}
    for method, disp in (
        [  # ('umle_wald', 'UMLE Wald'),
            # ('umle_boot', 'UMLE bootstrap (pivotal)'),
            # ('cmle_wald', 'CMLE Wald'),
            # ('cmle_boot', 'CMLE bootstrap (pivotal)'),
            # ('brazzale', 'Conditional (Brazzale)'),
            ("umle", "UMLE LR"),
            ("cmle_a", "CMLE-A LR"),
            ("cmle_is", "CMLE-IS (T = %d) LR" % T),
        ]
        + [("is_sc_c_%d" % n_MC, "IS-score (n = %d)" % n_MC) for n_MC in params["n_MC_levels"]]
        + [("is_sc_u_%d" % n_MC, "IS-score [un] (n = %d)" % n_MC) for n_MC in params["n_MC_levels"]]
        + [("is_lr_c_%d" % n_MC, "IS-LR (n = %d)" % n_MC) for n_MC in params["n_MC_levels"]]
        + [("is_lr_u_%d" % n_MC, "IS-LR [un] (n = %d)" % n_MC) for n_MC in params["n_MC_levels"]]
    ):
        results[method] = {"display": disp, "in_interval": [], "length": [], "total_time": 0.0}

    def do(out, name):
        ci, elapsed = out
        ci_l, ci_u = ci

        result = results[name]

        print "%s (%.2f sec): [%.2f, %.2f]" % (result["display"], elapsed, ci_l, ci_u)

        result["in_interval"].append(ci_l <= params["theta"] <= ci_u)
        result["length"].append(ci_u - ci_l)
        result["total_time"] += elapsed

    # Do experiment
    for X, v in generate_data(params["case"], params["theta"], seed):
        if (results["completed_trials"] == params["n_rep"]) or terminated:
            break

        theta_grid = np.linspace(params["theta_l"], params["theta_u"], L)

        # do(ci_umle_wald(X, v, alpha_level), 'umle_wald')

        # do(ci_umle_boot(X, v, alpha_level), 'umle_boot')

        # do(ci_cmle_wald(X, v, alpha_level), 'cmle_wald')

        # do(ci_cmle_boot(X, v, alpha_level), 'cmle_boot')

        # do(ci_brazzale(X, v, alpha_level), 'brazzale')

        do(ci_umle(X, v, theta_grid, alpha_level), "umle")

        do(ci_cmle_a(X, v, theta_grid, alpha_level), "cmle_a")

        do(ci_cmle_is(X, v, theta_grid, alpha_level, T, verbose), "cmle_is")

        for n_MC in params["n_MC_levels"]:
            for test in ["lr", "score"]:
                for corrected_str, corrected in [("c", True), ("u", False)]:
                    do(
                        ci_cons(
                            X,
                            v,
                            alpha_level,
                            params["L"],
                            params["theta_l"],
                            params["theta_u"],
                            n_MC,
                            test=test,
                            corrected=corrected,
                            verbose=verbose,
                        ),
                        "is_%s_%s_%d" % (test[0:2], corrected_str, n_MC),
                    )

        results["completed_trials"] += 1

    # For verifying that same data was generated even if different
    # algorithms consumed a different amount of randomness
    seed.final()

    return results
def do_experiment(params):
    if params['dump_fits'] and params['load_fits']:
        print 'Warning: simultaneously dumping and loading is a bad idea.'
        
    if params['dump_fits']:
        fits = []

    if params['load_fits']:
        with open(params['load_fits'], 'r') as fits_file:
            loaded_params_pick, loaded_fits = json.load(fits_file)

        loaded_params = dict([(k,unpick(v)) for (k,v) in loaded_params_pick])

        # Compare on parameters that control data generation and inference
        run_params = ['N', 'B', 'theta_sd', 'theta_fixed',
                      'alpha_unif_sd', 'alpha_norm_sd', 'alpha_gamma_sd',
                      'cov_unif_sd', 'cov_norm_sd', 'cov_disc_sd',
                      'kappa_target', 'pre_offset', 'post_fit',
                      'fit_nonstationary', 'fit_method', 'num_reps',
                      'is_T', 'sampling', 'sub_sizes_r', 'sub_sizes_c',
                      'random_seed']

        for p in run_params:
            if not np.all(loaded_params[p] == params[p]):
                print 'Warning: load mismatch on', p
    
    # Set random seed for reproducible output
    seed = Seed(params['random_seed'])

    # Initialize full network
    arr = Network(params['N'])

    # Generate node-level propensities to extend and receive edges
    if params['alpha_norm_sd'] > 0.0:
        alpha_norm(arr, params['alpha_norm_sd'])
    elif params['alpha_unif_sd'] > 0.0:
        alpha_unif(arr, params['alpha_unif_sd'])
    elif params['alpha_gamma_sd'] > 0.0:
        # Choosing location somewhat arbitrarily to give unit skewness
        alpha_gamma(arr, 4.0, params['alpha_gamma_sd'])
    else:
        alpha_zero(arr)

    # Generate covariates and associated coefficients
    data_model = NonstationaryLogistic()
    covariates = []
    for b in range(params['B']):
        name = 'x_%d' % b
        covariates.append(name)

        if name in params['theta_fixed']:
            data_model.beta[name] = params['theta_fixed'][name]
        else:
            data_model.beta[name] = np.random.normal(0, params['theta_sd'])

        if params['cov_unif_sd'] > 0.0:
            c = np.sqrt(12) / 2
            def f_x(i_1, i_2):
                return np.random.uniform(-c * params['cov_unif_sd'],
                                         c * params['cov_unif_sd'])
        elif params['cov_norm_sd'] > 0.0:
            def f_x(i_1, i_2):
                return np.random.normal(0, params['cov_norm_sd'])
        elif params['cov_disc_sd'] > 0.0:
            def f_x(i_1, i_2):
                return (params['cov_disc_sd'] *
                        (np.sign(np.random.random() - 0.5)))
        else:
            print 'Error: no covariate distribution specified.'
            sys.exit()

        arr.new_edge_covariate(name).from_binary_function_ind(f_x)

    # Generate large network, if necessary
    if not params['sampling'] == 'new':
        data_model.match_kappa(arr, params['kappa_target'])
        arr.generate(data_model)

    if params['fit_nonstationary']:
        fit_model = NonstationaryLogistic()
    else:
        fit_model = StationaryLogistic()
    for c in covariates:
        fit_model.beta[c] = None

    # Set up recording of results from experiment
    results = Results(params['sub_sizes_r'], params['sub_sizes_c'],
                      params['num_reps'], interactive = params['interactive'])
    add_array_stats(results)
    if params['plot_sig']:
        from scipy.stats import chi2
        crit = lambda dof: -0.5 * chi2.ppf(0.95, dof)

        umle_f = lambda n, f: f.nll(n, ignore_offset = True)
        umle_d = lambda n, d: d.nll(n, ignore_offset = True)
        umle_n = lambda n: NonstationaryLogistic().nll(n, ignore_offset = True)
        results.new('UMLE F-N', 'nm',
                    lambda n, d, f: umle_f(n, f) - umle_n(n))
        results.new('UMLE F-D', 'nm',
                    lambda n, d, f: umle_f(n, f) - umle_d(n, d))

        cmle_a_f = lambda n, f: acnll(n.as_dense(), np.exp(f.edge_probabilities(n)))
        cmle_a_d = lambda n, d: acnll(n.as_dense(), np.exp(d.edge_probabilities(n)))
        cmle_a_n = lambda n: acnll(n.as_dense(), np.ones_like(n.as_dense()))
        results.new('CMLE-A F-N', 'nm',
                    lambda n, d, f: cmle_a_f(n, f) - cmle_a_n(n))
        results.new('CMLE-A F-D', 'nm',
                    lambda n, d, f: cmle_a_f(n, f) - cmle_a_d(n, d))

        cmle_is_f = lambda n, f: f.fit_conditional(n, evaluate = True, T = 50)
        cmle_is_d = lambda n, d: d.fit_conditional(n, evaluate = True, T = 50)
        cmle_is_n = lambda n: NonstationaryLogistic().fit_conditional(n, evaluate = True, T = 50)
        results.new('CMLE-IS F-N', 'nm',
                    lambda n, d, f: cmle_is_f(n, f) - cmle_is_n(n))
        results.new('CMLE-IS F-D', 'nm',
                    lambda n, d, f: cmle_is_f(n, f) - cmle_is_d(n, d))

        c_cmle_f = lambda n, f: f.fit_c_conditional(n, evaluate = True)
        c_cmle_d = lambda n, d: d.fit_c_conditional(n, evaluate = True)
        c_cmle_n = lambda n: NonstationaryLogistic().fit_c_conditional(n, evaluate = True)
        results.new('C-CMLE F-N', 'nm',
                    lambda n, d, f: c_cmle_f(n, f) - c_cmle_n(n))
        results.new('C-CMLE F-D', 'nm',
                    lambda n, d, f: c_cmle_f(n, f) - c_cmle_d(n, d))

        results.new('UMLE sig.', 'dof',
                    lambda M, N, B: crit((M - 1) + (N - 1) + 1 + B))
        results.new('CMLE sig.', 'dof', lambda M, N, B: crit(B))
        results.new('C-CMLE sig.', 'dof', lambda M, N, B: crit((M - 1) + B))

    if params['sampling'] == 'new':
        results.new('Subnetwork kappa', 'm', lambda d, f: d.kappa)
    def true_est_theta_c(c):
        return (lambda d, f: d.beta[c]), (lambda d, f: f.beta[c])
    for c in covariates:
        # Need to do this hackily to avoid for-loop/lambda-binding weirdness.
        f_true, f_est = true_est_theta_c(c)
        results.new('True theta_{%s}' % c, 'm', f_true)
        results.new('Est. theta_{%s}' % c, 'm', f_est)
    if params['pre_offset'] or params['post_fit']:
        results.new('# Active', 'n',
                    lambda n: np.isfinite(n.offset.matrix()).sum())
    else:
        results.new('# Active', 'n', lambda n: n.M * n.N)
    if params['fisher_information']:
        def info_theta_c(c):
            def f_info_theta_c(d, f):
                return d.I_inv['theta_{%s}' % c]
            return f_info_theta_c
        for c in covariates:
            results.new('Info theta_{%s}' % c, 'm', info_theta_c(c))
    if params['baseline']:
        def rel_mse_p_ij(n, d, f):
            P = d.edge_probabilities(n)
            return rel_mse(f.edge_probabilities(n), f.baseline(n), P)
        results.new('Rel. MSE(P_ij)', 'nm', rel_mse_p_ij)
        if not (params['pre_offset'] or params['post_fit']):
            def rel_mse_logit_p_ij(n, d, f):
                logit_P = d.edge_probabilities(n, logit = True)
                logit_Q = f.baseline_logit(n)
                return rel_mse(f.edge_probabilities(n, logit = True),
                               logit_Q, logit_P)
            results.new('Rel. MSE(logit P_ij)', 'nm', rel_mse_logit_p_ij)

    if params['fit_method'] in ['convex_opt', 'conditional', 'c_conditional',
                                'irls', 'conditional_is']:
        results.new('Wall time (sec.)', 'm',
                    lambda d, f: f.fit_info['wall_time'])
    if params['fit_method'] in ['convex_opt',
                                'conditional', 'conditional_is']:
        def work(f):
            w = 0
            for work_type in ['nll_evals', 'grad_nll_evals', 'cnll_evals']:
                if work_type in f.fit_info:
                    w += f.fit_info[work_type]
            return w
        results.new('Work', 'm', lambda d, f: work(f))
        results.new('||ET_final - T||_2', 'm',
                    lambda d, f: l2(f.fit_info['grad_nll_final']))

    for sub_size in zip(results.M_sizes, results.N_sizes):
        print 'subnetwork size =', sub_size

        if params['sampling'] == 'new':
            gen = RandomSubnetworks(arr, sub_size)
        else:
            gen = RandomSubnetworks(arr, sub_size,
                                    method = params['sampling'])

        for rep in range(params['num_reps']):
            seed.next()
            sub = gen.sample()

            if params['fisher_information']:
                data_model.fisher_information(sub)

            if params['sampling'] == 'new':
                data_model.match_kappa(sub, params['kappa_target'])
                sub.generate(data_model)

            if params['load_fits']:
                fit, loaded_fits = loaded_fits[0], loaded_fits[1:]
                fit_model.beta = unpick(fit['theta'])
                if params['fix_broken_cmle_is']:
                    for b_n in fit_model.beta:
                        fit_model.beta[b_n] += 0.1474
                if 'alpha' in fit:
                    sub.row_covariates['alpha_out'] = unpick(fit['alpha'])
                if 'beta' in fit:
                    sub.col_covariates['alpha_in'] = unpick(fit['beta'])
                if 'kappa' in fit:
                    fit_model.kappa = fit['kappa']
                if 'offset' in fit:
                    sub.offset = unpick(fit['offset'])
                if 'fit_info' in fit:
                    fit_model.fit_info = unpick(fit['fit_info'])
            else:
                if params['pre_offset']:
                    sub.offset_extremes()

                if params['fit_method'] == 'convex_opt':
                    fit_model.fit_convex_opt(sub,
                                             verbose = params['verbose'])
                elif params['fit_method'] == 'irls':
                    fit_model.fit_irls(sub, verbose = params['verbose'])
                elif params['fit_method'] == 'logistic':
                    fit_model.fit_logistic(sub)
                elif params['fit_method'] == 'logistic_l2':
                    fit_model.fit_logistic_l2(sub, prior_precision = 1.0)
                elif params['fit_method'] == 'conditional':
                    fit_model.fit_conditional(sub,
                                              verbose = params['verbose'])
                elif params['fit_method'] == 'conditional_is':
                    fit_model.fit_conditional(sub, T = params['is_T'],
                                              verbose = params['verbose'])
                elif params['fit_method'] == 'c_conditional':
                    fit_model.fit_c_conditional(sub,
                                                verbose = params['verbose'])
                elif params['fit_method'] == 'composite':
                    fit_model.fit_composite(sub, T = 100,
                                            verbose = params['verbose'])
                elif params['fit_method'] == 'brazzale':
                    fit_model.fit_brazzale(sub)
                elif params['fit_method'] == 'saddlepoint':
                    fit_model.fit_saddlepoint(sub)
                elif params['fit_method'] == 'none':
                    pass

                if params['post_fit']:
                    sub.offset_extremes()
                    fit_model.fit_convex_opt(sub, fix_beta = True)

                if params['dump_fits']:
                    fit = {}
                    fit['theta'] = pick(fit_model.beta)
                    if 'alpha_out' in sub.row_covariates:
                        fit['alpha'] = pick(sub.row_covariates['alpha_out'])
                    if 'alpha_in' in sub.row_covariates:
                        fit['beta'] = pick(sub.col_covariates['alpha_in'])
                    if not fit_model.kappa is None:
                        fit['kappa'] = fit_model.kappa
                    if not sub.offset is None:
                        sub.offset.dirty()
                        fit['offset'] = pick(sub.offset)
                    if not fit_model.fit_info is None:
                        fit['fit_info'] = pick(fit_model.fit_info)

                    fits.append(fit)

            if params['find_good'] > 0:
                abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0'])
                if abs_err < params['find_good']:
                    print abs_err

                    sub.offset = None
                    fit_model.fit_conditional(sub, T = 1000,
                                              verbose = True)
                    print fit_model.beta['x_0']
                    print fit_model.fit_info

                    f = file('goodmat.mat', 'wb')
                    import scipy.io
                    Y = np.array(sub.as_dense(), dtype=np.float)
                    X = sub.edge_covariates['x_0'].matrix()
                    scipy.io.savemat(f, { 'Y': Y, 'X': X })
                    sys.exit()

            if params['find_bad'] > 0:
                abs_err = abs(fit_model.beta['x_0'] - data_model.beta['x_0'])
                if abs_err > params['find_bad']:
                    print abs_err

                    sub.offset = None
                    fit_model.fit_conditional(sub, T = 1000,
                                              verbose = True)
                    print fit_model.beta['x_0']
                    print fit_model.fit_info

                    f = file('badmat.mat', 'wb')
                    import scipy.io
                    Y = np.array(sub.as_dense(), dtype=np.float)
                    X = sub.edge_covariates['x_0'].matrix()
                    scipy.io.savemat(f, { 'Y': Y, 'X': X })
                    sys.exit()

            results.record(sub_size, rep, sub, data_model, fit_model)

            if params['verbose']:
                print

    if params['dump_fits']:
        with open(params['dump_fits'], 'w') as outfile:
            json.dump(([(p, pick(params[p])) for p in params], fits), outfile)

    # Compute beta MSEs
    covariate_naming = []
    for c in covariates:
        mse_name = 'MSE(theta_{%s})' % c
        true_name = 'True theta_{%s}' % c
        est_name = 'Est. theta_{%s}' % c
        results.estimate_mse(mse_name, true_name, est_name)
        covariate_naming.append((c, mse_name, true_name, est_name))

    # Report parameters for the run
    print 'Parameters:'
    for field in params:
        print '%s: %s' % (field, str(params[field]))

    # Should not vary between runs with the same seed and same number
    # of arrays tested
    seed.final()

    results.summary()

    return results, covariate_naming
# Daniel Klein, 10/26/2012

import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

from Network import Network
from Models import StationaryLogistic, NonstationaryLogistic
from Models import FixedMargins, alpha_norm
from BinaryMatrix import approximate_conditional_nll
from BinaryMatrix import approximate_from_margins_weights
from BinaryMatrix import log_partition_is
from Utility import init_latex_rendering
from Experiment import Seed

seed = Seed(3)

init_latex_rendering()

# Parameters
N = 25
G = 20
alpha_sd = 2.0
theta_true = {'x_1': 2.0, 'x_2': -1.0}
target_degree = 2

# Setup network
net = Network(N)
alpha_norm(net, alpha_sd)

# Setup data model and network covariates