Beispiel #1
0
def run_synth_test():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness(
    )

    # If x0 specified, load x0 from file
    x0 = None
    if options.x0_file is not None:
        with open(options.x0_file, 'r') as f:
            print "Initializing with state from: %s" % options.x0_file
            prev_x0 = cPickle.load(f)
            if isinstance(prev_x0, list):

                x0 = prev_x0[-1]
            else:
                mle_x0 = prev_x0
                # HACK: We're assuming x0 came from a standard GLM
                mle_model = make_model('standard_glm', N=data['N'])
                mle_popn = Population(mle_model)
                mle_popn.set_data(data)

                x0 = popn.sample(None)
                x0 = convert_model(mle_popn, mle_model, mle_x0, popn,
                                   popn.model, x0)

    use_existing = False

    fname = os.path.join(options.resultsDir,
                         '%s_marginal_lkhd.pkl' % options.model)
    if use_existing and  \
       os.path.exists(fname):

        print "Found existing results"
        with open(fname) as f:
            marg_lkhd = cPickle.load(f)
    else:
        N_samples = 10
        popn_true.set_data(data)

        # Estimate the marginal log likelihood
        print "Performing parallel inference"
        marg_lkhd, log_weights = parallel_ais(client,
                                              data,
                                              x0=x0,
                                              N_samples=N_samples,
                                              steps_per_B=50,
                                              resdir=options.resultsDir)

        # Save results
        print "Saving results to %s" % fname
        with open(fname, 'w') as f:
            cPickle.dump((marg_lkhd, log_weights), f, protocol=-1)
Beispiel #2
0
def run_synth_test():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness()

    # If x0 specified, load x0 from file
    x0 = None
    if options.x0_file is not None:
        with open(options.x0_file, "r") as f:
            print "Initializing with state from: %s" % options.x0_file
            prev_x0 = cPickle.load(f)
            if isinstance(prev_x0, list):

                x0 = prev_x0[-1]
            else:
                mle_x0 = prev_x0
                # HACK: We're assuming x0 came from a standard GLM
                mle_model = make_model("standard_glm", N=data["N"])
                mle_popn = Population(mle_model)
                mle_popn.set_data(data)

                x0 = popn.sample(None)
                x0 = convert_model(mle_popn, mle_model, mle_x0, popn, popn.model, x0)

    use_existing = False

    fname = os.path.join(options.resultsDir, "%s_marginal_lkhd.pkl" % options.model)
    if use_existing and os.path.exists(fname):

        print "Found existing results"
        with open(fname) as f:
            marg_lkhd = cPickle.load(f)
    else:
        N_samples = 10
        popn_true.set_data(data)

        # Estimate the marginal log likelihood
        print "Performing parallel inference"
        marg_lkhd, log_weights = parallel_ais(
            client, data, x0=x0, N_samples=N_samples, steps_per_B=50, resdir=options.resultsDir
        )

        # Save results
        print "Saving results to %s" % fname
        with open(fname, "w") as f:
            cPickle.dump((marg_lkhd, log_weights), f, protocol=-1)
def run_synth_test():
    """ Run a test with synthetic data and MAP inference via
        parallel coordinate descent.
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness()

    print "Performing parallel inference"
    x_inf = parallel_coord_descent(client, data['N'], maxiter=1)
    ll_inf = popn.compute_log_p(x_inf)
    print "LL_inf: %f" % ll_inf

    # Save results
    with open(os.path.join(options.resultsDir, 'results.pkl'),'w') as f:
        cPickle.dump(x_inf,f, protocol=-1)
    
    # Plot results
    plot_results(popn, x_inf, 
                 popn_true, x_true, 
                 do_plot_imp_responses=False,
                 resdir=options.resultsDir)
def run_synth_test():
    """ Run a test with synthetic data and MAP inference via
        parallel coordinate descent.
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness(
    )

    print "Performing parallel inference"
    x_inf = parallel_coord_descent(client, data['N'], maxiter=1)
    ll_inf = popn.compute_log_p(x_inf)
    print "LL_inf: %f" % ll_inf

    # Save results
    with open(os.path.join(options.resultsDir, 'results.pkl'), 'w') as f:
        cPickle.dump(x_inf, f, protocol=-1)

    # Plot results
    plot_results(popn,
                 x_inf,
                 popn_true,
                 x_true,
                 do_plot_imp_responses=False,
                 resdir=options.resultsDir)
def run_synth_test():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness()

    raise Exception("Make sur ethe sparsity is set properly!")

    # If x0 specified, load x0 from file
    x0 = None
    if options.x0_file is not None:
        with open(options.x0_file, 'r') as f:
            print "Initializing with state from: %s" % options.x0_file
            prev_x0 = cPickle.load(f)
            if isinstance(prev_x0, list):

                x0 = prev_x0[-1]
            else:
                mle_x0 = prev_x0
                # HACK: We're assuming x0 came from a standard GLM
                mle_model = make_model('standard_glm', N=data['N'])
                mle_popn = Population(mle_model)
                mle_popn.set_data(data)

                x0 = popn.sample(None)
                x0 = convert_model(mle_popn, mle_model, mle_x0, popn, popn.model, x0)

    # !!!!DEBUG!!!!!
    # Initialize with true variables
    # import copy
    # x0 = copy.deepcopy(x_true)

    use_existing = False
    
    if use_existing and  \
       os.path.exists(os.path.join(options.resultsDir, 'results.pkl')):

        print "Found existing results"
        with open(os.path.join(options.resultsDir, 'results.pkl')) as f:
            x_smpls = cPickle.load(f)
            N_samples = len(x_smpls)
    else:
        N_samples = 1000

        # Define a callback to evaluate log likelihoods and predictive log likelihoods
        print "Creating synthetic test data"
        T_test = 15
        popn_test = Population(popn.model)
        test_data = gen_synth_data(data['N'], T_test, popn_true, x_true)
        popn_test.set_data(test_data)

        # Compute pred ll under true model
        popn_true.set_data(test_data)
        x_true['predll'] = popn_true.compute_ll(x_true)
        popn_true.set_data(data)

        # Compute the predictive log likelihood under a homogeneous PP model wiht MLE
        # homog_pred_lls[j] = compute_homog_pp(train_data, test_data)

        pred_lls = np.zeros(N_samples)
        smpl = [0]
        def pred_ll_cbk(x):
            pred_ll = popn_test.compute_ll(x)
            pred_lls[smpl[0]] = pred_ll
            x['predll'] = pred_ll
            smpl[0] += 1
            print "Pred LL: %.2f" % pred_ll
        pred_ll_cbk = None

        # Perform inference
        print "Performing parallel inference"
        start_time = time.time()
        x_smpls = parallel_gibbs_sample(client, data,
                                        x0=x0, N_samples=N_samples,
                                        save_interval=50, results_dir=options.resultsDir,
                                        callback=pred_ll_cbk)
        stop_time = time.time()

        # Save results
        print "Saving results to %s" % os.path.join(options.resultsDir, 'results.pkl')
        with open(os.path.join(options.resultsDir, 'results.pkl'),'w') as f:
            cPickle.dump(x_smpls, f, protocol=-1)

        # Save runtime
        with open(os.path.join(options.resultsDir, 'runtime.pkl'),'w') as f:
            cPickle.dump(stop_time-start_time, f, protocol=-1)


    # Plot average of last 20% of samples
    print "Plotting results"
    smpl_frac = 1.0

    # Only plot the impulse response matrix for small N
    do_plot = data['N'] < 20
    do_plot_imp_responses = data['N'] < 30

    if do_plot:
        plot_results(popn,
                    x_smpls[-1*int(smpl_frac*len(x_smpls)):],
                    popn_true,
                    x_true,
                    do_plot_imp_responses=do_plot_imp_responses,
                    resdir=options.resultsDir)
def run_parallel_map():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness()

    # Get the list of models for cross validation
    base_model = make_model(options.model, N=data['N'])
    models = get_xv_models(base_model)

    # Segment data into training and cross validation sets
    train_frac = 0.75
    T_split = data['T'] * train_frac
    train_data = segment_data(data, (0,T_split))
    xv_data = segment_data(data, (T_split,data['T']))

    # Sample random initial state
    x0 = popn.sample(None)

    # Track the best model and parameters
    best_ind = -1
    best_xv_ll = -np.Inf
    best_x = x0
    best_model = None

    use_existing = False

    start_time = time.clock()

    # Fit each model using the optimum of the previous models
    train_lls = np.zeros(len(models))
    xv_lls = np.zeros(len(models))
    total_lls = np.zeros(len(models))
    for (i,model) in enumerate(models):
        print "Evaluating model %d" % i
        set_hyperparameters_on_engines(client[:], model)
        add_data_on_engines(client[:], train_data)

        if use_existing and  \
           os.path.exists(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i)):
            print "Found existing results for model %d" % i
            with open(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i)) as f:
                (x_inf, ll_train, ll_xv, ll_total) = cPickle.load(f)
                train_lls[i] = ll_train
                xv_lls[i] = ll_xv
                total_lls[i] = ll_total

        else:
            x0 = copy.deepcopy(best_x)
            # set_data_on_engines(client[:], train_data)
            ll0 = parallel_compute_ll(client[:], x0, data['N'])
            print "Training LL0: %f" % ll0

            # Perform inference
            x_inf = parallel_coord_descent(client, data['N'], x0=x0, maxiter=1,
                                           use_hessian=False,
                                           use_rop=False)

            ll_train = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Training LL_inf: %f" % ll_train
            train_lls[i] = ll_train

            # Compute log lkhd on xv data
            add_data_on_engines(client[:], xv_data)
            ll_xv = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Cross Validation LL: %f" % ll_xv
            xv_lls[i] = ll_xv

            # Compute log lkhd on total dataset
            add_data_on_engines(client[:], data)
            ll_total = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Total LL: %f" % ll_total
            total_lls[i] = ll_total

            print "Saving partial results"
            with open(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i),'w') as f:
                cPickle.dump((x_inf, ll_train, ll_xv, ll_total) ,f, protocol=-1)

        # Update best model
        if ll_xv > best_xv_ll:
            best_ind = i
            best_xv_ll = ll_xv
            best_x = copy.deepcopy(x_inf)
            best_model = copy.deepcopy(model)

    print "Training the best model (%d) with the full dataset" % best_ind
    # Set the best hyperparameters
    set_hyperparameters_on_engines(client[:], best_model)
    add_data_on_engines(client[:], data)

    # Fit the best model on the full training data
    best_x = parallel_coord_descent(client, data['N'], x0=best_x, maxiter=1,
                                    use_hessian=False,
                                    use_rop=False)

    # Print results summary
    for i in np.arange(len(models)):
        print "Model %d:\tTrain LL: %.1f\tXV LL: %.1f\tTotal LL: %.1f" % (i, train_lls[i], xv_lls[i], total_lls[i])
    print "Best model: %d" % best_ind
    print "Best Total LL: %f" % parallel_compute_ll(client[:], best_x, data['N'])
    print "True LL: %f" % popn_true.compute_ll(x_true)


    stop_time = time.clock()

    # Save results
    with open(os.path.join(options.resultsDir, 'results.pkl'),'w') as f:
        cPickle.dump(best_x, f, protocol=-1)

    # Save runtime
    with open(os.path.join(options.resultsDir, 'runtime.pkl'),'w') as f:
        cPickle.dump(stop_time-start_time, f, protocol=-1)
Beispiel #7
0
def run_synth_test():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness()

    # If x0 specified, load x0 from file
    x0 = None
    if options.x0_file is not None:
        with open(options.x0_file, 'r') as f:
            print "Initializing with state from: %s" % options.x0_file
            prev_x0 = cPickle.load(f)
            if isinstance(prev_x0, list):

                x0 = prev_x0[-1]
            else:
                mle_x0 = prev_x0
                # HACK: We're assuming x0 came from a standard GLM
                mle_model = make_model('standard_glm', N=data['N'])
                mle_popn = Population(mle_model)
                mle_popn.set_data(data)

                x0 = popn.sample()
                x0 = convert_model(mle_popn, mle_model, mle_x0, popn, popn.model, x0)
    
    use_existing = False
    
    if use_existing and  \
       os.path.exists(os.path.join(options.resultsDir, 'results.pkl')):

        print "Found existing results"
        with open(os.path.join(options.resultsDir, 'results.pkl')) as f:
            x_smpls = cPickle.load(f)
            N_samples = len(x_smpls)
    else:
        # Perform inference
        print "Performing parallel inference"
        N_samples = 1000
        x_smpls = parallel_gibbs_sample(client, data,
                                        x0=x0, N_samples=N_samples,
                                        save_interval=50, results_dir=options.resultsDir)
        
        # Save results
        print "Saving results to %s" % os.path.join(options.resultsDir, 'results.pkl')
        with open(os.path.join(options.resultsDir, 'results.pkl'),'w') as f:
            cPickle.dump(x_smpls, f, protocol=-1)

    # Plot average of last 20% of samples
    print "Plotting results"
    smpl_frac = 0.5

    # Only plot the impulse response matrix for small N
    do_plot = data['N'] < 20
    do_plot_imp_responses = data['N'] < 30

    if do_plot:
        plot_results(popn,
                    x_smpls[-1*int(smpl_frac*N_samples):],
                    popn_true,
                    x_true,
                    do_plot_imp_responses=do_plot_imp_responses,
                    resdir=options.resultsDir)
Beispiel #8
0
def run_parallel_map():
    """ Run a test with synthetic data and MCMC inference
    """
    options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness(
    )

    # Get the list of models for cross validation
    base_model = make_model(options.model, N=data['N'])
    models = get_xv_models(base_model)

    # Segment data into training and cross validation sets
    train_frac = 0.75
    T_split = data['T'] * train_frac
    train_data = segment_data(data, (0, T_split))
    xv_data = segment_data(data, (T_split, data['T']))

    # Sample random initial state
    x0 = popn.sample(None)

    # Track the best model and parameters
    best_ind = -1
    best_xv_ll = -np.Inf
    best_x = x0
    best_model = None

    use_existing = False

    start_time = time.clock()

    # Fit each model using the optimum of the previous models
    train_lls = np.zeros(len(models))
    xv_lls = np.zeros(len(models))
    total_lls = np.zeros(len(models))
    for (i, model) in enumerate(models):
        print "Evaluating model %d" % i
        set_hyperparameters_on_engines(client[:], model)
        add_data_on_engines(client[:], train_data)

        if use_existing and  \
           os.path.exists(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i)):
            print "Found existing results for model %d" % i
            with open(
                    os.path.join(options.resultsDir,
                                 'results.partial.%d.pkl' % i)) as f:
                (x_inf, ll_train, ll_xv, ll_total) = cPickle.load(f)
                train_lls[i] = ll_train
                xv_lls[i] = ll_xv
                total_lls[i] = ll_total

        else:
            x0 = copy.deepcopy(best_x)
            # set_data_on_engines(client[:], train_data)
            ll0 = parallel_compute_ll(client[:], x0, data['N'])
            print "Training LL0: %f" % ll0

            # Perform inference
            x_inf = parallel_coord_descent(client,
                                           data['N'],
                                           x0=x0,
                                           maxiter=1,
                                           use_hessian=False,
                                           use_rop=False)

            ll_train = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Training LL_inf: %f" % ll_train
            train_lls[i] = ll_train

            # Compute log lkhd on xv data
            add_data_on_engines(client[:], xv_data)
            ll_xv = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Cross Validation LL: %f" % ll_xv
            xv_lls[i] = ll_xv

            # Compute log lkhd on total dataset
            add_data_on_engines(client[:], data)
            ll_total = parallel_compute_ll(client[:], x_inf, data['N'])
            print "Total LL: %f" % ll_total
            total_lls[i] = ll_total

            print "Saving partial results"
            with open(
                    os.path.join(options.resultsDir,
                                 'results.partial.%d.pkl' % i), 'w') as f:
                cPickle.dump((x_inf, ll_train, ll_xv, ll_total),
                             f,
                             protocol=-1)

        # Update best model
        if ll_xv > best_xv_ll:
            best_ind = i
            best_xv_ll = ll_xv
            best_x = copy.deepcopy(x_inf)
            best_model = copy.deepcopy(model)

    print "Training the best model (%d) with the full dataset" % best_ind
    # Set the best hyperparameters
    set_hyperparameters_on_engines(client[:], best_model)
    add_data_on_engines(client[:], data)

    # Fit the best model on the full training data
    best_x = parallel_coord_descent(client,
                                    data['N'],
                                    x0=best_x,
                                    maxiter=1,
                                    use_hessian=False,
                                    use_rop=False)

    # Print results summary
    for i in np.arange(len(models)):
        print "Model %d:\tTrain LL: %.1f\tXV LL: %.1f\tTotal LL: %.1f" % (
            i, train_lls[i], xv_lls[i], total_lls[i])
    print "Best model: %d" % best_ind
    print "Best Total LL: %f" % parallel_compute_ll(client[:], best_x,
                                                    data['N'])
    print "True LL: %f" % popn_true.compute_ll(x_true)

    stop_time = time.clock()

    # Save results
    with open(os.path.join(options.resultsDir, 'results.pkl'), 'w') as f:
        cPickle.dump(best_x, f, protocol=-1)

    # Save runtime
    with open(os.path.join(options.resultsDir, 'runtime.pkl'), 'w') as f:
        cPickle.dump(stop_time - start_time, f, protocol=-1)