def run_comparison(data_path, test_path, output_path, T_train=None, seed=None): """ Run the comparison on the given data file :param data_path: :return: """ if seed is None: seed = np.random.randint(2**32) print("Setting seed to ", seed) np.random.seed(seed) assert os.path.exists( os.path.dirname(output_path)), "Output directory does not exist!" if data_path.endswith(".gz"): with gzip.open(data_path, 'r') as f: S, true_model = pickle.load(f) else: with open(data_path, 'r') as f: S, true_model = pickle.load(f) # If T_train is given, only use a fraction of the dataset if T_train is not None: S = S[:T_train, :] if test_path.endswith(".gz"): with gzip.open(test_path, 'r') as f: S_test, test_model = pickle.load(f) else: with open(test_path, 'r') as f: S_test, test_model = pickle.load(f) K = true_model.K C = true_model.C B = true_model.B dt = true_model.dt dt_max = true_model.dt_max use_parse_results = True if use_parse_results and os.path.exists(output_path + ".parsed_results.pkl"): with open(output_path + ".parsed_results.pkl") as f: auc_rocs, auc_prcs, plls, timestamps = pickle.load(f) timestamps['svi'] = np.array(timestamps['svi']) else: # Compute the cross correlation to estimate the connectivity W_xcorr = infer_net_from_xcorr(S, dtmax=true_model.dt_max // true_model.dt) # Fit a standard Hawkes model on subset of data with BFGS bfgs_model, bfgs_time = fit_standard_hawkes_model_bfgs( S, K, B, dt, dt_max, output_path=output_path) # Fit a standard Hawkes model with SGD # standard_models, timestamps = fit_standard_hawkes_model_sgd(S, K, B, dt, dt_max, # init_model=init_model) # # # Save the models # with open(output_path + ".sgd.pkl", 'w') as f: # print "Saving SGD results to ", (output_path + ".sgd.pkl") # cPickle.dump((standard_models, timestamps), f, protocol=-1) # Fit a network Hawkes model with Gibbs gibbs_samples, gibbs_timestamps = fit_network_hawkes_gibbs( S, K, C, B, dt, dt_max, output_path=output_path, standard_model=bfgs_model) # Fit a spike and slab network Hawkes model with Gibbs gibbs_ss_samples = gibbs_ss_timestamps = None # gibbs_ss_samples, gibbs_ss_timestamps = fit_network_hawkes_gibbs_ss(S, K, C, B, dt, dt_max, # output_path=output_path, # standard_model=bfgs_model) # Fit a network Hawkes model with Batch VB vb_models, vb_timestamps = fit_network_hawkes_vb( S, K, C, B, dt, dt_max, output_path=output_path, standard_model=bfgs_model) # Fit a network Hawkes model with SVI # svi_models = svi_timestamps = None svi_models, svi_timestamps = fit_network_hawkes_svi( S, K, C, B, dt, dt_max, output_path, standard_model=bfgs_model) # Combine timestamps into a dict timestamps = {} timestamps['bfgs'] = bfgs_time timestamps['gibbs'] = gibbs_timestamps timestamps['gibbs_ss'] = gibbs_ss_timestamps timestamps['svi'] = svi_timestamps timestamps['vb'] = vb_timestamps amis = compute_clustering_score(true_model, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, gibbs_ss_samples=gibbs_ss_samples, svi_models=svi_models, vb_models=vb_models) print("AMIS") pprint.pprint(amis) auc_rocs = compute_auc(true_model, W_xcorr=W_xcorr, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, gibbs_ss_samples=gibbs_ss_samples, svi_models=svi_models, vb_models=vb_models) print("AUC-ROC") pprint.pprint(auc_rocs) # Compute area under precisino recall curve of inferred network auc_prcs = compute_auc_prc(true_model, W_xcorr=W_xcorr, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, gibbs_ss_samples=gibbs_ss_samples, svi_models=svi_models, vb_models=vb_models) print("AUC-PRC") pprint.pprint(auc_prcs) plls = compute_predictive_ll(S_test, S, true_model=true_model, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, gibbs_ss_samples=gibbs_ss_samples, svi_models=svi_models, vb_models=vb_models) with open(output_path + ".parsed_results.pkl", 'w') as f: print("Saving parsed results to ", output_path + ".parsed_results.pkl") pickle.dump((auc_rocs, auc_prcs, plls, timestamps), f, protocol=-1) plot_pred_ll_vs_time(plls, timestamps, Z=float(S.size), T_train=T_train)
def run_comparison(data_path, output_path, seed=None, thresh=0.5): """ Run the comparison on the given data file :param data_path: :return: """ import ipdb ipdb.set_trace() if seed is None: seed = np.random.randint(2**32) print "Setting seed to ", seed np.random.seed(seed) assert os.path.exists( os.path.dirname(output_path)), "Output directory does not exist!" if data_path.endswith("_oopsi.pkl.gz"): # The oopsi data has a probability of spike with gzip.open(data_path, 'r') as f: P, F, Cf, network, pos = cPickle.load(f) S_full = P > thresh # onespk = np.bitwise_and(P > thresh, Cf < 0.3) # twospk = np.bitwise_and(P > thresh, Cf >= 0.3) # S_full = np.zeros_like(P) # S_full[onespk] = 1 # S_full[twospk] = 2 elif data_path.endswith(".gz"): with gzip.open(data_path, 'r') as f: S_full, F, bins, network, pos = cPickle.load(f) else: with open(data_path, 'r') as f: S_full, F, bins, network, pos = cPickle.load(f) # Cast to int S_full = S_full.astype(np.int) # Train on all but the last ten minutes (20ms time bins = 50Hz) T_train = 10 * 60 * 50 T_test = 10 * 60 * 50 # S = S_full[:-T_test, :] S = S_full[:T_train, :] S_test = S_full[-T_test:, :] K = S.shape[1] C = 1 dt = 0.02 dt_max = 0.08 # Compute the cross correlation to estimate the connectivity print "Estimating network via cross correlation" W_xcorr = infer_net_from_xcorr(S, dtmax=dt_max // dt) # HACK! Select the threshold by looking at the data test_thresholds = False if test_thresholds: print "Estimating network via cross correlation" F_xcorr = infer_net_from_xcorr(F, dtmax=3) aucs, _, _ = compute_auc_roc(network, W_xcorr=F_xcorr) print "AUC F: ", aucs["xcorr"] for thresh in np.linspace(0.1, 0.95, 20): S_thr = (P > thresh).astype(np.int) S_train = S_thr[:T_train, :] W_tmp = infer_net_from_xcorr(S_train, dtmax=dt_max // dt) aucs, _, _ = compute_auc_roc(network, W_xcorr=W_tmp) print "AUC (", thresh, "): ", aucs["xcorr"] import pdb pdb.set_trace() # Fit a standard Hawkes model on subset of data with BFGS bfgs_model, bfgs_time = \ fit_standard_hawkes_model_bfgs_noxv(S, K, dt, dt_max, output_path=output_path, W_max=None) # Fit a network Hawkes model with Gibbs gibbs_samples = gibbs_timestamps = None gibbs_samples, gibbs_timestamps = \ fit_network_hawkes_gibbs(S, K, C, dt, dt_max, output_path=output_path, standard_model=bfgs_model) # gibbs_samples, gibbs_timestamps = \ # fit_ct_network_hawkes_gibbs(S, K, C, dt, dt_max, # output_path=output_path, # standard_model=bfgs_model) # Fit a network Hawkes model with Batch VB # vb_models, vb_timestamps = fit_network_hawkes_vb(S, K, dt, dt_max, # standard_model=standard_models[-1]) # # with open(output_path + ".vb.pkl", 'w') as f: # print "Saving VB results to ", (output_path + ".vb.pkl") # cPickle.dump((vb_models, timestamps), f, protocol=-1) # Fit a network Hawkes model with SVI # svi_models = None svi_models, timestamps = fit_network_hawkes_svi(S, K, C, dt, dt_max, output_path, standard_model=bfgs_model, true_network=network) # Plot the network and its uncertainty import ipdb ipdb.set_trace() # Compute area under roc curve of inferred network auc_rocs, fprs, tprs = compute_auc_roc(network, W_xcorr=W_xcorr, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, svi_models=svi_models) print "AUC-ROC" pprint.pprint(auc_rocs) plot_roc_curves(fprs, tprs, fig_path=output_path) # Compute area under precisino recall curve of inferred network auc_prcs, precs, recalls = compute_auc_prc(network, W_xcorr=W_xcorr, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, svi_models=svi_models) print "AUC-PRC" pprint.pprint(auc_prcs) plot_prc_curves(precs, recalls, fig_path=output_path) # Compute the predictive log likelihoods plls = compute_predictive_ll(S_test, S, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, svi_models=svi_models) print "Log Predictive Likelihoods: " pprint.pprint(plls)
def run_comparison(data_path, output_path, seed=None): """ Run the comparison on the given data file :param data_path: :return: """ if seed is None: seed = np.random.randint(2**32) print "Setting seed to ", seed np.random.seed(seed) assert os.path.exists(os.path.dirname(output_path)), "Output directory does not exist!" if data_path.endswith("_oopsi.pkl.gz"): # The oopsi data has a probability of spike thresh = 0.1 with gzip.open(data_path, 'r') as f: P, F, Cf, network, pos = cPickle.load(f) S_full = P > thresh # onespk = np.bitwise_and(P > thresh, Cf < 0.3) # twospk = np.bitwise_and(P > thresh, Cf >= 0.3) # S_full = np.zeros_like(P) # S_full[onespk] = 1 # S_full[twospk] = 2 elif data_path.endswith(".gz"): with gzip.open(data_path, 'r') as f: S_full, F, bins, network, pos = cPickle.load(f) else: with open(data_path, 'r') as f: S_full, F, bins, network, pos = cPickle.load(f) # Cast to int S_full = S_full.astype(np.int) # Train on all but the last ten minutes (20ms time bins = 50Hz) T_train = 5 * 60 * 50 T_test = 10 * 60 * 50 # S = S_full[:-T_test, :] S = S_full[:T_train, :] S_test = S_full[-T_test:, :] K = S.shape[1] C = 5 dt = 0.02 dt_max = 0.08 # Compute the cross correlation to estimate the connectivity print "Estimating network via cross correlation" F_xcorr = infer_net_from_xcorr(F[:10000,:], dtmax=3) # Compute the cross correlation to estimate the connectivity # print "Estimating network via cross correlation" W_xcorr = infer_net_from_xcorr(S[:10000], dtmax=dt_max // dt) # Fit a standard Hawkes model on subset of data with BFGS bfgs_model, bfgs_time = fit_standard_hawkes_model_bfgs(S, K, dt, dt_max, output_path=output_path) # Fit a standard Hawkes model with SGD # standard_models, timestamps = fit_standard_hawkes_model_sgd(S, K, dt, dt_max, # init_model=init_model) # # # Save the models # with open(output_path + ".sgd.pkl", 'w') as f: # print "Saving SGD results to ", (output_path + ".sgd.pkl") # cPickle.dump((standard_models, timestamps), f, protocol=-1) # Fit a network Hawkes model with Gibbs gibbs_samples = gibbs_timestamps = None gibbs_samples, gibbs_timestamps = fit_network_hawkes_gibbs(S, K, C, dt, dt_max, output_path=output_path, standard_model=bfgs_model) # Fit a network Hawkes model with Batch VB # vb_models, vb_timestamps = fit_network_hawkes_vb(S, K, dt, dt_max, # standard_model=standard_models[-1]) # # with open(output_path + ".vb.pkl", 'w') as f: # print "Saving VB results to ", (output_path + ".vb.pkl") # cPickle.dump((vb_models, timestamps), f, protocol=-1) # Fit a network Hawkes model with SVI svi_models, timestamps = fit_network_hawkes_svi(S, K, C, dt, dt_max, output_path, standard_model=bfgs_model) # Compute area under roc curve of inferred network auc_rocs, fprs, tprs = compute_auc_roc(network, W_xcorr=W_xcorr, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, svi_models=svi_models) print "AUC-ROC" pprint.pprint(auc_rocs) plot_roc_curves(fprs, tprs) # Compute area under precisino recall curve of inferred network auc_prcs, precs, recalls = compute_auc_prc(network, W_xcorr=W_xcorr, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, svi_models=svi_models) print "AUC-PRC" pprint.pprint(auc_prcs) plot_prc_curves(precs, recalls)
def run_comparison(data_path, output_path, seed=None, thresh=0.5): """ Run the comparison on the given data file :param data_path: :return: """ import ipdb ipdb.set_trace() if seed is None: seed = np.random.randint(2 ** 32) print "Setting seed to ", seed np.random.seed(seed) assert os.path.exists(os.path.dirname(output_path)), "Output directory does not exist!" if data_path.endswith("_oopsi.pkl.gz"): # The oopsi data has a probability of spike with gzip.open(data_path, "r") as f: P, F, Cf, network, pos = cPickle.load(f) S_full = P > thresh # onespk = np.bitwise_and(P > thresh, Cf < 0.3) # twospk = np.bitwise_and(P > thresh, Cf >= 0.3) # S_full = np.zeros_like(P) # S_full[onespk] = 1 # S_full[twospk] = 2 elif data_path.endswith(".gz"): with gzip.open(data_path, "r") as f: S_full, F, bins, network, pos = cPickle.load(f) else: with open(data_path, "r") as f: S_full, F, bins, network, pos = cPickle.load(f) # Cast to int S_full = S_full.astype(np.int) # Train on all but the last ten minutes (20ms time bins = 50Hz) T_train = 10 * 60 * 50 T_test = 10 * 60 * 50 # S = S_full[:-T_test, :] S = S_full[:T_train, :] S_test = S_full[-T_test:, :] K = S.shape[1] C = 1 dt = 0.02 dt_max = 0.08 # Compute the cross correlation to estimate the connectivity print "Estimating network via cross correlation" W_xcorr = infer_net_from_xcorr(S, dtmax=dt_max // dt) # HACK! Select the threshold by looking at the data test_thresholds = False if test_thresholds: print "Estimating network via cross correlation" F_xcorr = infer_net_from_xcorr(F, dtmax=3) aucs, _, _ = compute_auc_roc(network, W_xcorr=F_xcorr) print "AUC F: ", aucs["xcorr"] for thresh in np.linspace(0.1, 0.95, 20): S_thr = (P > thresh).astype(np.int) S_train = S_thr[:T_train, :] W_tmp = infer_net_from_xcorr(S_train, dtmax=dt_max // dt) aucs, _, _ = compute_auc_roc(network, W_xcorr=W_tmp) print "AUC (", thresh, "): ", aucs["xcorr"] import pdb pdb.set_trace() # Fit a standard Hawkes model on subset of data with BFGS bfgs_model, bfgs_time = fit_standard_hawkes_model_bfgs_noxv(S, K, dt, dt_max, output_path=output_path, W_max=None) # Fit a network Hawkes model with Gibbs gibbs_samples = gibbs_timestamps = None gibbs_samples, gibbs_timestamps = fit_network_hawkes_gibbs( S, K, C, dt, dt_max, output_path=output_path, standard_model=bfgs_model ) # gibbs_samples, gibbs_timestamps = \ # fit_ct_network_hawkes_gibbs(S, K, C, dt, dt_max, # output_path=output_path, # standard_model=bfgs_model) # Fit a network Hawkes model with Batch VB # vb_models, vb_timestamps = fit_network_hawkes_vb(S, K, dt, dt_max, # standard_model=standard_models[-1]) # # with open(output_path + ".vb.pkl", 'w') as f: # print "Saving VB results to ", (output_path + ".vb.pkl") # cPickle.dump((vb_models, timestamps), f, protocol=-1) # Fit a network Hawkes model with SVI # svi_models = None svi_models, timestamps = fit_network_hawkes_svi( S, K, C, dt, dt_max, output_path, standard_model=bfgs_model, true_network=network ) # Plot the network and its uncertainty import ipdb ipdb.set_trace() # Compute area under roc curve of inferred network auc_rocs, fprs, tprs = compute_auc_roc( network, W_xcorr=W_xcorr, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, svi_models=svi_models ) print "AUC-ROC" pprint.pprint(auc_rocs) plot_roc_curves(fprs, tprs, fig_path=output_path) # Compute area under precisino recall curve of inferred network auc_prcs, precs, recalls = compute_auc_prc( network, W_xcorr=W_xcorr, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, svi_models=svi_models ) print "AUC-PRC" pprint.pprint(auc_prcs) plot_prc_curves(precs, recalls, fig_path=output_path) # Compute the predictive log likelihoods plls = compute_predictive_ll(S_test, S, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, svi_models=svi_models) print "Log Predictive Likelihoods: " pprint.pprint(plls)
def run_comparison(data_path, test_path, output_path, T_train=None, seed=None): """ Run the comparison on the given data file :param data_path: :return: """ if seed is None: seed = np.random.randint(2**32) print "Setting seed to ", seed np.random.seed(seed) assert os.path.exists(os.path.dirname(output_path)), "Output directory does not exist!" if data_path.endswith(".gz"): with gzip.open(data_path, 'r') as f: S, true_model = cPickle.load(f) else: with open(data_path, 'r') as f: S, true_model = cPickle.load(f) # If T_train is given, only use a fraction of the dataset if T_train is not None: S = S[:T_train,:] if test_path.endswith(".gz"): with gzip.open(test_path, 'r') as f: S_test, test_model = cPickle.load(f) else: with open(test_path, 'r') as f: S_test, test_model = cPickle.load(f) K = true_model.K C = true_model.C B = true_model.B dt = true_model.dt dt_max = true_model.dt_max use_parse_results = True if use_parse_results and os.path.exists(output_path + ".parsed_results.pkl"): with open(output_path + ".parsed_results.pkl") as f: auc_rocs, auc_prcs, plls, timestamps = cPickle.load(f) timestamps['svi'] = np.array(timestamps['svi']) else: # Compute the cross correlation to estimate the connectivity W_xcorr = infer_net_from_xcorr(S, dtmax=true_model.dt_max // true_model.dt) # Fit a standard Hawkes model on subset of data with BFGS bfgs_model, bfgs_time = fit_standard_hawkes_model_bfgs(S, K, B, dt, dt_max, output_path=output_path) # Fit a standard Hawkes model with SGD # standard_models, timestamps = fit_standard_hawkes_model_sgd(S, K, B, dt, dt_max, # init_model=init_model) # # # Save the models # with open(output_path + ".sgd.pkl", 'w') as f: # print "Saving SGD results to ", (output_path + ".sgd.pkl") # cPickle.dump((standard_models, timestamps), f, protocol=-1) # Fit a network Hawkes model with Gibbs gibbs_samples, gibbs_timestamps = fit_network_hawkes_gibbs(S, K, C, B, dt, dt_max, output_path=output_path, standard_model=bfgs_model) # Fit a spike and slab network Hawkes model with Gibbs gibbs_ss_samples = gibbs_ss_timestamps = None # gibbs_ss_samples, gibbs_ss_timestamps = fit_network_hawkes_gibbs_ss(S, K, C, B, dt, dt_max, # output_path=output_path, # standard_model=bfgs_model) # Fit a network Hawkes model with Batch VB vb_models, vb_timestamps = fit_network_hawkes_vb(S, K, C, B, dt, dt_max, output_path=output_path, standard_model=bfgs_model) # Fit a network Hawkes model with SVI # svi_models = svi_timestamps = None svi_models, svi_timestamps = fit_network_hawkes_svi(S, K, C, B, dt, dt_max, output_path, standard_model=bfgs_model) # Combine timestamps into a dict timestamps = {} timestamps['bfgs'] = bfgs_time timestamps['gibbs'] = gibbs_timestamps timestamps['gibbs_ss'] = gibbs_ss_timestamps timestamps['svi'] = svi_timestamps timestamps['vb'] = vb_timestamps amis = compute_clustering_score(true_model, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, gibbs_ss_samples=gibbs_ss_samples, svi_models=svi_models, vb_models=vb_models) print "AMIS" pprint.pprint(amis) auc_rocs = compute_auc(true_model, W_xcorr=W_xcorr, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, gibbs_ss_samples=gibbs_ss_samples, svi_models=svi_models, vb_models=vb_models) print "AUC-ROC" pprint.pprint(auc_rocs) # Compute area under precisino recall curve of inferred network auc_prcs = compute_auc_prc(true_model, W_xcorr=W_xcorr, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, gibbs_ss_samples=gibbs_ss_samples, svi_models=svi_models, vb_models=vb_models) print "AUC-PRC" pprint.pprint(auc_prcs) plls = compute_predictive_ll(S_test, S, true_model=true_model, bfgs_model=bfgs_model, gibbs_samples=gibbs_samples, gibbs_ss_samples=gibbs_ss_samples, svi_models=svi_models, vb_models=vb_models) with open(output_path + ".parsed_results.pkl", 'w') as f: print "Saving parsed results to ", output_path + ".parsed_results.pkl" cPickle.dump((auc_rocs, auc_prcs, plls, timestamps), f, protocol=-1) plot_pred_ll_vs_time(plls, timestamps, Z=float(S.size), T_train=T_train)