def fit_ct_network_hawkes_gibbs(S, S_test, dt, dt_max, output_path, model_args={}, standard_model=None, N_samples=100, time_limit=8 * 60 * 60): K = S.shape[1] S_ct, C_ct, T = convert_discrete_to_continuous(S, dt) S_test_ct, C_test_ct, T_test = convert_discrete_to_continuous(S_test, dt) # Check for existing Gibbs results if os.path.exists(output_path): with gzip.open(output_path, 'r') as f: print("Loading Gibbs results from ", output_path) results = pickle.load(f) else: print( "Fitting the data with a continuous time network Hawkes model using Gibbs sampling" ) test_model = \ ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, **model_args) test_model.add_data(S_ct, C_ct, T) # Initialize with the standard model parameters if standard_model is not None: test_model.initialize_with_standard_model(standard_model) # Gibbs sample samples = [] lps = [test_model.log_probability()] hlls = [ test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test) ] times = [0] for _ in progprint_xrange(N_samples, perline=25): # Update the model tic = time.time() test_model.resample_model() times.append(time.time() - tic) samples.append(copy.deepcopy(test_model.get_parameters())) # Compute log probability and heldout log likelihood # lps.append(test_model.log_probability()) hlls.append( test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test)) # # Save this sample # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f: # cPickle.dump(samples[-1], f, protocol=-1) # Check if time limit has been exceeded if np.sum(times) > time_limit: break # Get cumulative timestamps timestamps = np.cumsum(times) lps = np.array(lps) hlls = np.array(hlls) # Make results object results = Results(samples, timestamps, lps, hlls) # Save the Gibbs samples with gzip.open(output_path, 'w') as f: print("Saving Gibbs samples to ", output_path) pickle.dump(results, f, protocol=-1) return results
def compute_predictive_ll(S_test, S_train, true_model=None, bfgs_model=None, sgd_models=None, gibbs_samples=None, vb_models=None, svi_models=None): """ Compute the predictive log likelihood :return: """ plls = {} # Compute homogeneous pred ll T = S_train.shape[0] T_test = S_test.shape[0] lam_homog = S_train.sum(axis=0) / float(T) plls['homog'] = 0 plls['homog'] += -gammaln(S_test + 1).sum() plls['homog'] += (-lam_homog * T_test).sum() plls['homog'] += (S_test.sum(axis=0) * np.log(lam_homog)).sum() if true_model is not None: plls['true'] = true_model.heldout_log_likelihood(S_test) if bfgs_model is not None: assert isinstance(bfgs_model, DiscreteTimeStandardHawkesModel) plls['bfgs'] = bfgs_model.heldout_log_likelihood(S_test) if sgd_models is not None: assert isinstance(sgd_models, list) plls['sgd'] = np.zeros(len(sgd_models)) for i, sgd_model in enumerate(sgd_models): plls['sgd'] = sgd_model.heldout_log_likelihood(S_test) if gibbs_samples is not None: print "Computing pred ll for Gibbs" # Compute log(E[pred likelihood]) on second half of samplese offset = len(gibbs_samples) // 2 # Preconvolve with the Gibbs model's basis # F_test = gibbs_samples[0].basis.convolve_with_basis(S_test) S_ct, C_ct, T = convert_discrete_to_continuous(S_test, 0.02) plls['gibbs'] = [] for s in gibbs_samples[offset:]: # plls['gibbs'].append(s.heldout_log_likelihood(S_test, F=F_test)) plls['gibbs'].append(s.heldout_log_likelihood(S_ct, C_ct, T)) # Convert to numpy array plls['gibbs'] = np.array(plls['gibbs']) import ipdb ipdb.set_trace() if vb_models is not None: print "Computing pred ll for VB" # Compute predictive likelihood over samples from VB model N_models = len(vb_models) N_samples = 100 # Preconvolve with the VB model's basis F_test = vb_models[0].basis.convolve_with_basis(S_test) vb_plls = np.zeros((N_models, N_samples)) for i, vb_model in enumerate(vb_models): for j in xrange(N_samples): vb_model.resample_from_mf() vb_plls[i, j] = vb_model.heldout_log_likelihood(S_test, F=F_test) # Compute the log of the average predicted likelihood plls['vb'] = -np.log(N_samples) + logsumexp(vb_plls, axis=1) if svi_models is not None: print "Computing predictive likelihood for SVI models" # Compute predictive likelihood over samples from VB model N_models = len(svi_models) N_samples = 1 # Preconvolve with the VB model's basis F_test = svi_models[0].basis.convolve_with_basis(S_test) svi_plls = np.zeros((N_models, N_samples)) for i, svi_model in enumerate(svi_models): # print "Computing pred ll for SVI iteration ", i for j in xrange(N_samples): svi_model.resample_from_mf() svi_plls[i, j] = svi_model.heldout_log_likelihood(S_test, F=F_test) plls['svi'] = -np.log(N_samples) + logsumexp(svi_plls, axis=1) return plls
def fit_ct_network_hawkes_gibbs(S, K, C, dt, dt_max, output_path, standard_model=None): # Check for existing Gibbs results if os.path.exists(output_path + ".gibbs.pkl"): with open(output_path + ".gibbs.pkl", 'r') as f: print "Loading Gibbs results from ", (output_path + ".gibbs.pkl") (samples, timestamps) = cPickle.load(f) else: print "Fitting the data with a network Hawkes model using Gibbs sampling" S_ct, C_ct, T = convert_discrete_to_continuous(S, dt) # Set the network prior such that E[W] ~= 0.01 # W ~ Gamma(kappa, v) for kappa = 1.25 => v ~ 125 # v ~ Gamma(alpha, beta) for alpha = 10, beta = 10 / 125 E_W = 0.2 kappa = 10. E_v = kappa / E_W alpha = 5. beta = alpha / E_v network_hypers = { 'C': 1, "c": np.zeros(K).astype(np.int), "p": 0.25, "v": E_v, # 'kappa': kappa, # 'alpha': alpha, 'beta': beta, # 'p': 0.1, 'allow_self_connections': False } test_model = \ ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, network_hypers=network_hypers) test_model.add_data(S_ct, C_ct, T) # Initialize with the standard model parameters if standard_model is not None: test_model.initialize_with_standard_model(standard_model) plt.ion() im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.025) plt.pause(0.001) # Gibbs sample N_samples = 100 samples = [] lps = [test_model.log_probability()] timestamps = [] for itr in xrange(N_samples): if itr % 1 == 0: print "Iteration ", itr, "\tLL: ", lps[-1] im.set_data(test_model.weight_model.W_effective) plt.pause(0.001) # lps.append(test_model.log_probability()) lps.append(test_model.log_probability()) samples.append(test_model.resample_and_copy()) timestamps.append(time.clock()) print test_model.network.p # Save this sample with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f: cPickle.dump(samples[-1], f, protocol=-1) # Save the Gibbs samples with open(output_path + ".gibbs.pkl", 'w') as f: print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl") cPickle.dump((samples, timestamps), f, protocol=-1) return samples, timestamps
def compute_predictive_ll( S_test, S_train, true_model=None, bfgs_model=None, sgd_models=None, gibbs_samples=None, vb_models=None, svi_models=None, ): """ Compute the predictive log likelihood :return: """ plls = {} # Compute homogeneous pred ll T = S_train.shape[0] T_test = S_test.shape[0] lam_homog = S_train.sum(axis=0) / float(T) plls["homog"] = 0 plls["homog"] += -gammaln(S_test + 1).sum() plls["homog"] += (-lam_homog * T_test).sum() plls["homog"] += (S_test.sum(axis=0) * np.log(lam_homog)).sum() if true_model is not None: plls["true"] = true_model.heldout_log_likelihood(S_test) if bfgs_model is not None: assert isinstance(bfgs_model, DiscreteTimeStandardHawkesModel) plls["bfgs"] = bfgs_model.heldout_log_likelihood(S_test) if sgd_models is not None: assert isinstance(sgd_models, list) plls["sgd"] = np.zeros(len(sgd_models)) for i, sgd_model in enumerate(sgd_models): plls["sgd"] = sgd_model.heldout_log_likelihood(S_test) if gibbs_samples is not None: print "Computing pred ll for Gibbs" # Compute log(E[pred likelihood]) on second half of samplese offset = len(gibbs_samples) // 2 # Preconvolve with the Gibbs model's basis # F_test = gibbs_samples[0].basis.convolve_with_basis(S_test) S_ct, C_ct, T = convert_discrete_to_continuous(S_test, 0.02) plls["gibbs"] = [] for s in gibbs_samples[offset:]: # plls['gibbs'].append(s.heldout_log_likelihood(S_test, F=F_test)) plls["gibbs"].append(s.heldout_log_likelihood(S_ct, C_ct, T)) # Convert to numpy array plls["gibbs"] = np.array(plls["gibbs"]) import ipdb ipdb.set_trace() if vb_models is not None: print "Computing pred ll for VB" # Compute predictive likelihood over samples from VB model N_models = len(vb_models) N_samples = 100 # Preconvolve with the VB model's basis F_test = vb_models[0].basis.convolve_with_basis(S_test) vb_plls = np.zeros((N_models, N_samples)) for i, vb_model in enumerate(vb_models): for j in xrange(N_samples): vb_model.resample_from_mf() vb_plls[i, j] = vb_model.heldout_log_likelihood(S_test, F=F_test) # Compute the log of the average predicted likelihood plls["vb"] = -np.log(N_samples) + logsumexp(vb_plls, axis=1) if svi_models is not None: print "Computing predictive likelihood for SVI models" # Compute predictive likelihood over samples from VB model N_models = len(svi_models) N_samples = 1 # Preconvolve with the VB model's basis F_test = svi_models[0].basis.convolve_with_basis(S_test) svi_plls = np.zeros((N_models, N_samples)) for i, svi_model in enumerate(svi_models): # print "Computing pred ll for SVI iteration ", i for j in xrange(N_samples): svi_model.resample_from_mf() svi_plls[i, j] = svi_model.heldout_log_likelihood(S_test, F=F_test) plls["svi"] = -np.log(N_samples) + logsumexp(svi_plls, axis=1) return plls
def fit_ct_network_hawkes_gibbs(S, K, C, dt, dt_max, output_path, standard_model=None): # Check for existing Gibbs results if os.path.exists(output_path + ".gibbs.pkl"): with open(output_path + ".gibbs.pkl", "r") as f: print "Loading Gibbs results from ", (output_path + ".gibbs.pkl") (samples, timestamps) = cPickle.load(f) else: print "Fitting the data with a network Hawkes model using Gibbs sampling" S_ct, C_ct, T = convert_discrete_to_continuous(S, dt) # Set the network prior such that E[W] ~= 0.01 # W ~ Gamma(kappa, v) for kappa = 1.25 => v ~ 125 # v ~ Gamma(alpha, beta) for alpha = 10, beta = 10 / 125 E_W = 0.2 kappa = 10.0 E_v = kappa / E_W alpha = 5.0 beta = alpha / E_v network_hypers = { "C": 1, "c": np.zeros(K).astype(np.int), "p": 0.25, "v": E_v, # 'kappa': kappa, # 'alpha': alpha, 'beta': beta, # 'p': 0.1, "allow_self_connections": False, } test_model = ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, network_hypers=network_hypers) test_model.add_data(S_ct, C_ct, T) # Initialize with the standard model parameters if standard_model is not None: test_model.initialize_with_standard_model(standard_model) plt.ion() im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.025) plt.pause(0.001) # Gibbs sample N_samples = 100 samples = [] lps = [test_model.log_probability()] timestamps = [] for itr in xrange(N_samples): if itr % 1 == 0: print "Iteration ", itr, "\tLL: ", lps[-1] im.set_data(test_model.weight_model.W_effective) plt.pause(0.001) # lps.append(test_model.log_probability()) lps.append(test_model.log_probability()) samples.append(test_model.resample_and_copy()) timestamps.append(time.clock()) print test_model.network.p # Save this sample with open(output_path + ".gibbs.itr%04d.pkl" % itr, "w") as f: cPickle.dump(samples[-1], f, protocol=-1) # Save the Gibbs samples with open(output_path + ".gibbs.pkl", "w") as f: print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl") cPickle.dump((samples, timestamps), f, protocol=-1) return samples, timestamps
def fit_ct_network_hawkes_gibbs(S, S_test, dt, dt_max, output_path, model_args={}, standard_model=None, N_samples=100, time_limit=8*60*60): K = S.shape[1] S_ct, C_ct, T = convert_discrete_to_continuous(S, dt) S_test_ct, C_test_ct, T_test = convert_discrete_to_continuous(S_test, dt) # Check for existing Gibbs results if os.path.exists(output_path): with gzip.open(output_path, 'r') as f: print "Loading Gibbs results from ", output_path results = cPickle.load(f) else: print "Fitting the data with a continuous time network Hawkes model using Gibbs sampling" test_model = \ ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, **model_args) test_model.add_data(S_ct, C_ct, T) # Initialize with the standard model parameters if standard_model is not None: test_model.initialize_with_standard_model(standard_model) # Gibbs sample samples = [] lps = [test_model.log_probability()] hlls = [test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test)] times = [0] for _ in progprint_xrange(N_samples, perline=25): # Update the model tic = time.time() test_model.resample_model() times.append(time.time() - tic) samples.append(copy.deepcopy(test_model.get_parameters())) # Compute log probability and heldout log likelihood # lps.append(test_model.log_probability()) hlls.append(test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test)) # # Save this sample # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f: # cPickle.dump(samples[-1], f, protocol=-1) # Check if time limit has been exceeded if np.sum(times) > time_limit: break # Get cumulative timestamps timestamps = np.cumsum(times) lps = np.array(lps) hlls = np.array(hlls) # Make results object results = Results(samples, timestamps, lps, hlls) # Save the Gibbs samples with gzip.open(output_path, 'w') as f: print "Saving Gibbs samples to ", output_path cPickle.dump(results, f, protocol=-1) return results