def run_synth_test(): """ Run a test with synthetic data and MCMC inference """ options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness( ) # If x0 specified, load x0 from file x0 = None if options.x0_file is not None: with open(options.x0_file, 'r') as f: print "Initializing with state from: %s" % options.x0_file prev_x0 = cPickle.load(f) if isinstance(prev_x0, list): x0 = prev_x0[-1] else: mle_x0 = prev_x0 # HACK: We're assuming x0 came from a standard GLM mle_model = make_model('standard_glm', N=data['N']) mle_popn = Population(mle_model) mle_popn.set_data(data) x0 = popn.sample(None) x0 = convert_model(mle_popn, mle_model, mle_x0, popn, popn.model, x0) use_existing = False fname = os.path.join(options.resultsDir, '%s_marginal_lkhd.pkl' % options.model) if use_existing and \ os.path.exists(fname): print "Found existing results" with open(fname) as f: marg_lkhd = cPickle.load(f) else: N_samples = 10 popn_true.set_data(data) # Estimate the marginal log likelihood print "Performing parallel inference" marg_lkhd, log_weights = parallel_ais(client, data, x0=x0, N_samples=N_samples, steps_per_B=50, resdir=options.resultsDir) # Save results print "Saving results to %s" % fname with open(fname, 'w') as f: cPickle.dump((marg_lkhd, log_weights), f, protocol=-1)
def run_synth_test(): """ Run a test with synthetic data and MCMC inference """ options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness() # If x0 specified, load x0 from file x0 = None if options.x0_file is not None: with open(options.x0_file, "r") as f: print "Initializing with state from: %s" % options.x0_file prev_x0 = cPickle.load(f) if isinstance(prev_x0, list): x0 = prev_x0[-1] else: mle_x0 = prev_x0 # HACK: We're assuming x0 came from a standard GLM mle_model = make_model("standard_glm", N=data["N"]) mle_popn = Population(mle_model) mle_popn.set_data(data) x0 = popn.sample(None) x0 = convert_model(mle_popn, mle_model, mle_x0, popn, popn.model, x0) use_existing = False fname = os.path.join(options.resultsDir, "%s_marginal_lkhd.pkl" % options.model) if use_existing and os.path.exists(fname): print "Found existing results" with open(fname) as f: marg_lkhd = cPickle.load(f) else: N_samples = 10 popn_true.set_data(data) # Estimate the marginal log likelihood print "Performing parallel inference" marg_lkhd, log_weights = parallel_ais( client, data, x0=x0, N_samples=N_samples, steps_per_B=50, resdir=options.resultsDir ) # Save results print "Saving results to %s" % fname with open(fname, "w") as f: cPickle.dump((marg_lkhd, log_weights), f, protocol=-1)
def run_synth_test(): """ Run a test with synthetic data and MAP inference via parallel coordinate descent. """ options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness() print "Performing parallel inference" x_inf = parallel_coord_descent(client, data['N'], maxiter=1) ll_inf = popn.compute_log_p(x_inf) print "LL_inf: %f" % ll_inf # Save results with open(os.path.join(options.resultsDir, 'results.pkl'),'w') as f: cPickle.dump(x_inf,f, protocol=-1) # Plot results plot_results(popn, x_inf, popn_true, x_true, do_plot_imp_responses=False, resdir=options.resultsDir)
def run_synth_test(): """ Run a test with synthetic data and MAP inference via parallel coordinate descent. """ options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness( ) print "Performing parallel inference" x_inf = parallel_coord_descent(client, data['N'], maxiter=1) ll_inf = popn.compute_log_p(x_inf) print "LL_inf: %f" % ll_inf # Save results with open(os.path.join(options.resultsDir, 'results.pkl'), 'w') as f: cPickle.dump(x_inf, f, protocol=-1) # Plot results plot_results(popn, x_inf, popn_true, x_true, do_plot_imp_responses=False, resdir=options.resultsDir)
def run_synth_test(): """ Run a test with synthetic data and MCMC inference """ options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness() raise Exception("Make sur ethe sparsity is set properly!") # If x0 specified, load x0 from file x0 = None if options.x0_file is not None: with open(options.x0_file, 'r') as f: print "Initializing with state from: %s" % options.x0_file prev_x0 = cPickle.load(f) if isinstance(prev_x0, list): x0 = prev_x0[-1] else: mle_x0 = prev_x0 # HACK: We're assuming x0 came from a standard GLM mle_model = make_model('standard_glm', N=data['N']) mle_popn = Population(mle_model) mle_popn.set_data(data) x0 = popn.sample(None) x0 = convert_model(mle_popn, mle_model, mle_x0, popn, popn.model, x0) # !!!!DEBUG!!!!! # Initialize with true variables # import copy # x0 = copy.deepcopy(x_true) use_existing = False if use_existing and \ os.path.exists(os.path.join(options.resultsDir, 'results.pkl')): print "Found existing results" with open(os.path.join(options.resultsDir, 'results.pkl')) as f: x_smpls = cPickle.load(f) N_samples = len(x_smpls) else: N_samples = 1000 # Define a callback to evaluate log likelihoods and predictive log likelihoods print "Creating synthetic test data" T_test = 15 popn_test = Population(popn.model) test_data = gen_synth_data(data['N'], T_test, popn_true, x_true) popn_test.set_data(test_data) # Compute pred ll under true model popn_true.set_data(test_data) x_true['predll'] = popn_true.compute_ll(x_true) popn_true.set_data(data) # Compute the predictive log likelihood under a homogeneous PP model wiht MLE # homog_pred_lls[j] = compute_homog_pp(train_data, test_data) pred_lls = np.zeros(N_samples) smpl = [0] def pred_ll_cbk(x): pred_ll = popn_test.compute_ll(x) pred_lls[smpl[0]] = pred_ll x['predll'] = pred_ll smpl[0] += 1 print "Pred LL: %.2f" % pred_ll pred_ll_cbk = None # Perform inference print "Performing parallel inference" start_time = time.time() x_smpls = parallel_gibbs_sample(client, data, x0=x0, N_samples=N_samples, save_interval=50, results_dir=options.resultsDir, callback=pred_ll_cbk) stop_time = time.time() # Save results print "Saving results to %s" % os.path.join(options.resultsDir, 'results.pkl') with open(os.path.join(options.resultsDir, 'results.pkl'),'w') as f: cPickle.dump(x_smpls, f, protocol=-1) # Save runtime with open(os.path.join(options.resultsDir, 'runtime.pkl'),'w') as f: cPickle.dump(stop_time-start_time, f, protocol=-1) # Plot average of last 20% of samples print "Plotting results" smpl_frac = 1.0 # Only plot the impulse response matrix for small N do_plot = data['N'] < 20 do_plot_imp_responses = data['N'] < 30 if do_plot: plot_results(popn, x_smpls[-1*int(smpl_frac*len(x_smpls)):], popn_true, x_true, do_plot_imp_responses=do_plot_imp_responses, resdir=options.resultsDir)
def run_parallel_map(): """ Run a test with synthetic data and MCMC inference """ options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness() # Get the list of models for cross validation base_model = make_model(options.model, N=data['N']) models = get_xv_models(base_model) # Segment data into training and cross validation sets train_frac = 0.75 T_split = data['T'] * train_frac train_data = segment_data(data, (0,T_split)) xv_data = segment_data(data, (T_split,data['T'])) # Sample random initial state x0 = popn.sample(None) # Track the best model and parameters best_ind = -1 best_xv_ll = -np.Inf best_x = x0 best_model = None use_existing = False start_time = time.clock() # Fit each model using the optimum of the previous models train_lls = np.zeros(len(models)) xv_lls = np.zeros(len(models)) total_lls = np.zeros(len(models)) for (i,model) in enumerate(models): print "Evaluating model %d" % i set_hyperparameters_on_engines(client[:], model) add_data_on_engines(client[:], train_data) if use_existing and \ os.path.exists(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i)): print "Found existing results for model %d" % i with open(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i)) as f: (x_inf, ll_train, ll_xv, ll_total) = cPickle.load(f) train_lls[i] = ll_train xv_lls[i] = ll_xv total_lls[i] = ll_total else: x0 = copy.deepcopy(best_x) # set_data_on_engines(client[:], train_data) ll0 = parallel_compute_ll(client[:], x0, data['N']) print "Training LL0: %f" % ll0 # Perform inference x_inf = parallel_coord_descent(client, data['N'], x0=x0, maxiter=1, use_hessian=False, use_rop=False) ll_train = parallel_compute_ll(client[:], x_inf, data['N']) print "Training LL_inf: %f" % ll_train train_lls[i] = ll_train # Compute log lkhd on xv data add_data_on_engines(client[:], xv_data) ll_xv = parallel_compute_ll(client[:], x_inf, data['N']) print "Cross Validation LL: %f" % ll_xv xv_lls[i] = ll_xv # Compute log lkhd on total dataset add_data_on_engines(client[:], data) ll_total = parallel_compute_ll(client[:], x_inf, data['N']) print "Total LL: %f" % ll_total total_lls[i] = ll_total print "Saving partial results" with open(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i),'w') as f: cPickle.dump((x_inf, ll_train, ll_xv, ll_total) ,f, protocol=-1) # Update best model if ll_xv > best_xv_ll: best_ind = i best_xv_ll = ll_xv best_x = copy.deepcopy(x_inf) best_model = copy.deepcopy(model) print "Training the best model (%d) with the full dataset" % best_ind # Set the best hyperparameters set_hyperparameters_on_engines(client[:], best_model) add_data_on_engines(client[:], data) # Fit the best model on the full training data best_x = parallel_coord_descent(client, data['N'], x0=best_x, maxiter=1, use_hessian=False, use_rop=False) # Print results summary for i in np.arange(len(models)): print "Model %d:\tTrain LL: %.1f\tXV LL: %.1f\tTotal LL: %.1f" % (i, train_lls[i], xv_lls[i], total_lls[i]) print "Best model: %d" % best_ind print "Best Total LL: %f" % parallel_compute_ll(client[:], best_x, data['N']) print "True LL: %f" % popn_true.compute_ll(x_true) stop_time = time.clock() # Save results with open(os.path.join(options.resultsDir, 'results.pkl'),'w') as f: cPickle.dump(best_x, f, protocol=-1) # Save runtime with open(os.path.join(options.resultsDir, 'runtime.pkl'),'w') as f: cPickle.dump(stop_time-start_time, f, protocol=-1)
def run_synth_test(): """ Run a test with synthetic data and MCMC inference """ options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness() # If x0 specified, load x0 from file x0 = None if options.x0_file is not None: with open(options.x0_file, 'r') as f: print "Initializing with state from: %s" % options.x0_file prev_x0 = cPickle.load(f) if isinstance(prev_x0, list): x0 = prev_x0[-1] else: mle_x0 = prev_x0 # HACK: We're assuming x0 came from a standard GLM mle_model = make_model('standard_glm', N=data['N']) mle_popn = Population(mle_model) mle_popn.set_data(data) x0 = popn.sample() x0 = convert_model(mle_popn, mle_model, mle_x0, popn, popn.model, x0) use_existing = False if use_existing and \ os.path.exists(os.path.join(options.resultsDir, 'results.pkl')): print "Found existing results" with open(os.path.join(options.resultsDir, 'results.pkl')) as f: x_smpls = cPickle.load(f) N_samples = len(x_smpls) else: # Perform inference print "Performing parallel inference" N_samples = 1000 x_smpls = parallel_gibbs_sample(client, data, x0=x0, N_samples=N_samples, save_interval=50, results_dir=options.resultsDir) # Save results print "Saving results to %s" % os.path.join(options.resultsDir, 'results.pkl') with open(os.path.join(options.resultsDir, 'results.pkl'),'w') as f: cPickle.dump(x_smpls, f, protocol=-1) # Plot average of last 20% of samples print "Plotting results" smpl_frac = 0.5 # Only plot the impulse response matrix for small N do_plot = data['N'] < 20 do_plot_imp_responses = data['N'] < 30 if do_plot: plot_results(popn, x_smpls[-1*int(smpl_frac*N_samples):], popn_true, x_true, do_plot_imp_responses=do_plot_imp_responses, resdir=options.resultsDir)
def run_parallel_map(): """ Run a test with synthetic data and MCMC inference """ options, popn, data, client, popn_true, x_true = initialize_parallel_test_harness( ) # Get the list of models for cross validation base_model = make_model(options.model, N=data['N']) models = get_xv_models(base_model) # Segment data into training and cross validation sets train_frac = 0.75 T_split = data['T'] * train_frac train_data = segment_data(data, (0, T_split)) xv_data = segment_data(data, (T_split, data['T'])) # Sample random initial state x0 = popn.sample(None) # Track the best model and parameters best_ind = -1 best_xv_ll = -np.Inf best_x = x0 best_model = None use_existing = False start_time = time.clock() # Fit each model using the optimum of the previous models train_lls = np.zeros(len(models)) xv_lls = np.zeros(len(models)) total_lls = np.zeros(len(models)) for (i, model) in enumerate(models): print "Evaluating model %d" % i set_hyperparameters_on_engines(client[:], model) add_data_on_engines(client[:], train_data) if use_existing and \ os.path.exists(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i)): print "Found existing results for model %d" % i with open( os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i)) as f: (x_inf, ll_train, ll_xv, ll_total) = cPickle.load(f) train_lls[i] = ll_train xv_lls[i] = ll_xv total_lls[i] = ll_total else: x0 = copy.deepcopy(best_x) # set_data_on_engines(client[:], train_data) ll0 = parallel_compute_ll(client[:], x0, data['N']) print "Training LL0: %f" % ll0 # Perform inference x_inf = parallel_coord_descent(client, data['N'], x0=x0, maxiter=1, use_hessian=False, use_rop=False) ll_train = parallel_compute_ll(client[:], x_inf, data['N']) print "Training LL_inf: %f" % ll_train train_lls[i] = ll_train # Compute log lkhd on xv data add_data_on_engines(client[:], xv_data) ll_xv = parallel_compute_ll(client[:], x_inf, data['N']) print "Cross Validation LL: %f" % ll_xv xv_lls[i] = ll_xv # Compute log lkhd on total dataset add_data_on_engines(client[:], data) ll_total = parallel_compute_ll(client[:], x_inf, data['N']) print "Total LL: %f" % ll_total total_lls[i] = ll_total print "Saving partial results" with open( os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i), 'w') as f: cPickle.dump((x_inf, ll_train, ll_xv, ll_total), f, protocol=-1) # Update best model if ll_xv > best_xv_ll: best_ind = i best_xv_ll = ll_xv best_x = copy.deepcopy(x_inf) best_model = copy.deepcopy(model) print "Training the best model (%d) with the full dataset" % best_ind # Set the best hyperparameters set_hyperparameters_on_engines(client[:], best_model) add_data_on_engines(client[:], data) # Fit the best model on the full training data best_x = parallel_coord_descent(client, data['N'], x0=best_x, maxiter=1, use_hessian=False, use_rop=False) # Print results summary for i in np.arange(len(models)): print "Model %d:\tTrain LL: %.1f\tXV LL: %.1f\tTotal LL: %.1f" % ( i, train_lls[i], xv_lls[i], total_lls[i]) print "Best model: %d" % best_ind print "Best Total LL: %f" % parallel_compute_ll(client[:], best_x, data['N']) print "True LL: %f" % popn_true.compute_ll(x_true) stop_time = time.clock() # Save results with open(os.path.join(options.resultsDir, 'results.pkl'), 'w') as f: cPickle.dump(best_x, f, protocol=-1) # Save runtime with open(os.path.join(options.resultsDir, 'runtime.pkl'), 'w') as f: cPickle.dump(stop_time - start_time, f, protocol=-1)