dt_max = 3.0 network_hypers = {'c': np.array([0], dtype=np.int), 'p': 0.5, 'kappa': 3.0, 'v': 15.0} weight_hypers = {"kappa_0": 3.0, "nu_0": 15.0} model = DiscreteTimeNetworkHawkesModelGammaMixture(K=1, dt=dt, dt_max=dt_max, weight_hypers=weight_hypers, network_hypers=network_hypers) model.generate(T=T) # Gibbs sample and then generate new data N_samples = 10000 samples = [] lps = [] for itr in progprint_xrange(N_samples, perline=50): # Resample the model model.resample_model(resample_network=False) samples.append(model.copy_sample()) lps.append(model.log_probability()) # Geweke step model.data_list.pop() model.generate(T=T) # Compute sample statistics for second half of samples A_samples = np.array([s.weight_model.A for s in samples]) W_samples = np.array([s.weight_model.W for s in samples]) g_samples = np.array([s.impulse_model.g for s in samples]) lambda0_samples = np.array([s.bias_model.lambda0 for s in samples]) c_samples = np.array([s.network.c for s in samples]) p_samples = np.array([s.network.p for s in samples])
def demo(seed=None): """ Fit a weakly sparse :return: """ if seed is None: seed = np.random.randint(2**32) print "Setting seed to ", seed np.random.seed(seed) ########################################################### # Load some example data. # See data/synthetic/generate.py to create more. ########################################################### data_path = os.path.join("data", "synthetic", "synthetic_K20_C4_T10000.pkl.gz") with gzip.open(data_path, 'r') as f: S, true_model = cPickle.load(f) T = S.shape[0] K = true_model.K B = true_model.B dt = true_model.dt dt_max = true_model.dt_max ########################################################### # Initialize with MAP estimation on a standard Hawkes model ########################################################### init_with_map = True if init_with_map: init_len = T print "Initializing with BFGS on first ", init_len, " time bins." init_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, dt_max=dt_max, B=B, alpha=1.0, beta=1.0) init_model.add_data(S[:init_len, :]) init_model.initialize_to_background_rate() init_model.fit_with_bfgs() else: init_model = None ########################################################### # Create a test weak spike-and-slab model ########################################################### # Copy the network hypers. # Give the test model p, but not c, v, or m network_hypers = true_model.network_hypers.copy() network_hypers['v'] = None test_model = DiscreteTimeNetworkHawkesModelGammaMixture( K=K, dt=dt, dt_max=dt_max, B=B, basis_hypers=true_model.basis_hypers, bkgd_hypers=true_model.bkgd_hypers, impulse_hypers=true_model.impulse_hypers, weight_hypers=true_model.weight_hypers, network_hypers=network_hypers) test_model.add_data(S) # Initialize with the standard model parameters if init_model is not None: test_model.initialize_with_standard_model(init_model) ########################################################### # Fit the test model with Gibbs sampling ########################################################### N_samples = 500 samples = [] lps = [] # plls = [] for itr in xrange(N_samples): lps.append(test_model.log_probability()) # plls.append(test_model.heldout_log_likelihood(S_test, F=F_test)) samples.append(test_model.copy_sample()) print "" print "Gibbs iteration ", itr print "LP: ", lps[-1] test_model.resample_model() ########################################################### # Analyze the samples ########################################################### N_samples = len(samples) A_samples = np.array([s.weight_model.A for s in samples]) W_samples = np.array([s.weight_model.W for s in samples]) g_samples = np.array([s.impulse_model.g for s in samples]) lambda0_samples = np.array([s.bias_model.lambda0 for s in samples]) lps = np.array(lps) offset = N_samples // 2 A_mean = A_samples[offset:, ...].mean(axis=0) W_mean = W_samples[offset:, ...].mean(axis=0) g_mean = g_samples[offset:, ...].mean(axis=0) lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0) plt.figure() plt.plot(np.arange(N_samples), lps, 'k') plt.xlabel("Iteration") plt.ylabel("Log probability") plt.show() # Compute the link prediction accuracy curves auc_init = roc_auc_score(true_model.weight_model.A.ravel(), init_model.W.ravel()) auc_A_mean = roc_auc_score(true_model.weight_model.A.ravel(), A_mean.ravel()) auc_W_mean = roc_auc_score(true_model.weight_model.A.ravel(), W_mean.ravel()) aucs = [] for A in A_samples: aucs.append(roc_auc_score(true_model.weight_model.A.ravel(), A.ravel())) plt.figure() plt.plot(aucs, '-r') plt.plot(auc_A_mean * np.ones_like(aucs), '--r') plt.plot(auc_W_mean * np.ones_like(aucs), '--b') plt.plot(auc_init * np.ones_like(aucs), '--k') plt.xlabel("Iteration") plt.ylabel("Link prediction AUC") plt.show() plt.ioff() plt.show()
test_model.add_data(S) # Initialize with the standard model parameters if init_model is not None: test_model.initialize_with_standard_model(init_model) ########################################################### # Fit the test model with Gibbs sampling ########################################################### N_samples = 500 samples = [] lps = [] for itr in progprint_xrange(N_samples): lps.append(test_model.log_probability()) samples.append(test_model.copy_sample()) test_model.resample_model() ########################################################### # Analyze the samples ########################################################### N_samples = len(samples) A_samples = np.array([s.weight_model.A for s in samples]) W_samples = np.array([s.weight_model.W for s in samples]) g_samples = np.array([s.impulse_model.g for s in samples]) lambda0_samples = np.array([s.bias_model.lambda0 for s in samples]) lps = np.array(lps) offset = N_samples // 2 A_mean = A_samples[offset:, ...].mean(axis=0) W_mean = W_samples[offset:, ...].mean(axis=0) g_mean = g_samples[offset:, ...].mean(axis=0)
def demo(seed=None): """ Fit a weakly sparse :return: """ if seed is None: seed = np.random.randint(2**32) print "Setting seed to ", seed np.random.seed(seed) ########################################################### # Load some example data. # See data/synthetic/generate.py to create more. ########################################################### data_path = os.path.join("data", "synthetic", "synthetic_K20_C4_T10000.pkl.gz") with gzip.open(data_path, 'r') as f: S, true_model = cPickle.load(f) T = S.shape[0] K = true_model.K B = true_model.B dt = true_model.dt dt_max = true_model.dt_max ########################################################### # Initialize with MAP estimation on a standard Hawkes model ########################################################### init_with_map = True if init_with_map: init_len = T print "Initializing with BFGS on first ", init_len, " time bins." init_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, dt_max=dt_max, B=B, alpha=1.0, beta=1.0) init_model.add_data(S[:init_len, :]) init_model.initialize_to_background_rate() init_model.fit_with_bfgs() else: init_model = None ########################################################### # Create a test weak spike-and-slab model ########################################################### # Copy the network hypers. # Give the test model p, but not c, v, or m network_hypers = true_model.network_hypers.copy() network_hypers['v'] = None test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max, B=B, basis_hypers=true_model.basis_hypers, bkgd_hypers=true_model.bkgd_hypers, impulse_hypers=true_model.impulse_hypers, weight_hypers=true_model.weight_hypers, network_hypers=network_hypers) test_model.add_data(S) # Initialize with the standard model parameters if init_model is not None: test_model.initialize_with_standard_model(init_model) ########################################################### # Fit the test model with Gibbs sampling ########################################################### N_samples = 500 samples = [] lps = [] # plls = [] for itr in xrange(N_samples): lps.append(test_model.log_probability()) # plls.append(test_model.heldout_log_likelihood(S_test, F=F_test)) samples.append(test_model.copy_sample()) print "" print "Gibbs iteration ", itr print "LP: ", lps[-1] test_model.resample_model() ########################################################### # Analyze the samples ########################################################### N_samples = len(samples) A_samples = np.array([s.weight_model.A for s in samples]) W_samples = np.array([s.weight_model.W for s in samples]) g_samples = np.array([s.impulse_model.g for s in samples]) lambda0_samples = np.array([s.bias_model.lambda0 for s in samples]) lps = np.array(lps) offset = N_samples // 2 A_mean = A_samples[offset:, ...].mean(axis=0) W_mean = W_samples[offset:, ...].mean(axis=0) g_mean = g_samples[offset:, ...].mean(axis=0) lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0) plt.figure() plt.plot(np.arange(N_samples), lps, 'k') plt.xlabel("Iteration") plt.ylabel("Log probability") plt.show() # Compute the link prediction accuracy curves auc_init = roc_auc_score(true_model.weight_model.A.ravel(), init_model.W.ravel()) auc_A_mean = roc_auc_score(true_model.weight_model.A.ravel(), A_mean.ravel()) auc_W_mean = roc_auc_score(true_model.weight_model.A.ravel(), W_mean.ravel()) aucs = [] for A in A_samples: aucs.append(roc_auc_score(true_model.weight_model.A.ravel(), A.ravel())) plt.figure() plt.plot(aucs, '-r') plt.plot(auc_A_mean * np.ones_like(aucs), '--r') plt.plot(auc_W_mean * np.ones_like(aucs), '--b') plt.plot(auc_init * np.ones_like(aucs), '--k') plt.xlabel("Iteration") plt.ylabel("Link prediction AUC") plt.show() plt.ioff() plt.show()
weight_hypers = {"kappa_0": 3.0, "nu_0": 15.0} model = DiscreteTimeNetworkHawkesModelGammaMixture( K=1, dt=dt, dt_max=dt_max, weight_hypers=weight_hypers, network_hypers=network_hypers) model.generate(T=T) # Gibbs sample and then generate new data N_samples = 10000 samples = [] lps = [] for itr in progprint_xrange(N_samples, perline=50): # Resample the model model.resample_model(resample_network=False) samples.append(model.copy_sample()) lps.append(model.log_probability()) # Geweke step model.data_list.pop() model.generate(T=T) # Compute sample statistics for second half of samples A_samples = np.array([s.weight_model.A for s in samples]) W_samples = np.array([s.weight_model.W for s in samples]) g_samples = np.array([s.impulse_model.g for s in samples]) lambda0_samples = np.array([s.bias_model.lambda0 for s in samples]) c_samples = np.array([s.network.c for s in samples]) p_samples = np.array([s.network.p for s in samples]) v_samples = np.array([s.network.v for s in samples])
def geweke_test(): """ Create a discrete time Hawkes model and generate from it. :return: """ T = 50 dt = 1.0 dt_max = 3.0 network_hypers = { 'c': np.array([0], dtype=np.int), 'p': 0.5, 'kappa': 3.0, 'v': 15.0 } model = DiscreteTimeNetworkHawkesModelGammaMixture( K=1, dt=dt, dt_max=dt_max, network_hypers=network_hypers) model.generate(T=T) # Gibbs sample and then generate new data N_samples = 10000 samples = [] lps = [] for itr in xrange(N_samples): if itr % 10 == 0: print "Iteration: ", itr # Resample the model model.resample_model(resample_network=False) samples.append(model.copy_sample()) lps.append(model.log_probability()) # Geweke step model.data_list.pop() model.generate(T=T) # Compute sample statistics for second half of samples A_samples = np.array([s.weight_model.A for s in samples]) W_samples = np.array([s.weight_model.W for s in samples]) g_samples = np.array([s.impulse_model.g for s in samples]) lambda0_samples = np.array([s.bias_model.lambda0 for s in samples]) c_samples = np.array([s.network.c for s in samples]) p_samples = np.array([s.network.p for s in samples]) v_samples = np.array([s.network.v for s in samples]) lps = np.array(lps) offset = 0 A_mean = A_samples[offset:, ...].mean(axis=0) W_mean = W_samples[offset:, ...].mean(axis=0) g_mean = g_samples[offset:, ...].mean(axis=0) lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0) print "A mean: ", A_mean print "W mean: ", W_mean print "g mean: ", g_mean print "lambda0 mean: ", lambda0_mean # Plot the log probability over iterations plt.figure() plt.plot(np.arange(N_samples), lps) plt.xlabel("Iteration") plt.ylabel("Log probability") # Plot the histogram of bias samples plt.figure() p_lmbda0 = gamma(model.bias_model.alpha, scale=1. / model.bias_model.beta) _, bins, _ = plt.hist(lambda0_samples[:, 0], bins=20, alpha=0.5, normed=True) bincenters = 0.5 * (bins[1:] + bins[:-1]) plt.plot(bincenters, p_lmbda0.pdf(bincenters), 'r--', linewidth=1) plt.xlabel('lam0') plt.ylabel('p(lam0)') print "Expected p(A): ", model.network.P print "Empirical p(A): ", A_samples.mean(axis=0) # Plot the histogram of weight samples plt.figure() Aeq1 = A_samples[:, 0, 0] == 1 p_W1 = gamma(model.network.kappa, scale=1. / model.network.v[0, 0]) _, bins, _ = plt.hist(W_samples[Aeq1, 0, 0], bins=20, alpha=0.5, normed=True) bincenters = 0.5 * (bins[1:] + bins[:-1]) plt.plot(bincenters, p_W1.pdf(bincenters), 'r--', linewidth=1) plt.xlabel('W') plt.ylabel('p(W | A=1)') plt.figure() Aeq0 = A_samples[:, 0, 0] == 0 p_W1 = gamma(model.weight_model.kappa_0, scale=1. / model.weight_model.nu_0) _, bins, _ = plt.hist(W_samples[Aeq0, 0, 0], bins=20, alpha=0.5, normed=True) bincenters = 0.5 * (bins[1:] + bins[:-1]) plt.plot(bincenters, p_W1.pdf(bincenters), 'r--', linewidth=1) plt.xlabel('W') plt.ylabel('p(W | A=0)') # Plot the histogram of impulse samples plt.figure() for b in range(model.B): plt.subplot(1, model.B, b + 1) a = model.impulse_model.gamma[b] b = model.impulse_model.gamma.sum() - a p_beta11b = beta(a, b) _, bins, _ = plt.hist(g_samples[:, 0, 0, b], bins=20, alpha=0.5, normed=True) bincenters = 0.5 * (bins[1:] + bins[:-1]) plt.plot(bincenters, p_beta11b.pdf(bincenters), 'r--', linewidth=1) plt.xlabel('g_%d' % b) plt.ylabel('p(g_%d)' % b) plt.show()
# Initialize with the standard model parameters if init_model is not None: test_model.initialize_with_standard_model(init_model) ########################################################### # Fit the test model with Gibbs sampling ########################################################### N_samples = 500 samples = [] lps = [] for itr in progprint_xrange(N_samples): lps.append(test_model.log_probability()) samples.append(test_model.copy_sample()) test_model.resample_model() ########################################################### # Analyze the samples ########################################################### N_samples = len(samples) A_samples = np.array([s.weight_model.A for s in samples]) W_samples = np.array([s.weight_model.W for s in samples]) g_samples = np.array([s.impulse_model.g for s in samples]) lambda0_samples = np.array([s.bias_model.lambda0 for s in samples]) lps = np.array(lps) offset = N_samples // 2 A_mean = A_samples[offset:, ...].mean(axis=0) W_mean = W_samples[offset:, ...].mean(axis=0) g_mean = g_samples[offset:, ...].mean(axis=0)
def demo(seed=None): """ Fit a weakly sparse :return: """ if seed is None: seed = np.random.randint(2**32) print "Setting seed to ", seed np.random.seed(seed) ########################################################### # Load some example data. # See data/synthetic/generate.py to create more. ########################################################### data_path = os.path.join("data", "synthetic", "synthetic_K20_C4_T10000.pkl.gz") with gzip.open(data_path, 'r') as f: S, true_model = cPickle.load(f) T = S.shape[0] K = true_model.K B = true_model.B dt = true_model.dt dt_max = true_model.dt_max ########################################################### # Initialize with MAP estimation on a standard Hawkes model ########################################################### init_with_map = True if init_with_map: init_len = T print "Initializing with BFGS on first ", init_len, " time bins." init_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, dt_max=dt_max, B=B, alpha=1.0, beta=1.0) init_model.add_data(S[:init_len, :]) init_model.initialize_to_background_rate() init_model.fit_with_bfgs() else: init_model = None ########################################################### # Create a test weak spike-and-slab model ########################################################### # Copy the network hypers. # Give the test model p, but not c, v, or m network_hypers = true_model.network_hypers.copy() network_hypers['c'] = None network_hypers['v'] = None network_hypers['m'] = None test_model = DiscreteTimeNetworkHawkesModelGammaMixture( K=K, dt=dt, dt_max=dt_max, B=B, basis_hypers=true_model.basis_hypers, bkgd_hypers=true_model.bkgd_hypers, impulse_hypers=true_model.impulse_hypers, weight_hypers=true_model.weight_hypers, network_hypers=network_hypers) test_model.add_data(S) # F_test = test_model.basis.convolve_with_basis(S_test) # Initialize with the standard model parameters if init_model is not None: test_model.initialize_with_standard_model(init_model) # Initialize plots ln, im_net, im_clus = initialize_plots(true_model, test_model, S) ########################################################### # Fit the test model with Gibbs sampling ########################################################### N_samples = 500 samples = [] lps = [] # plls = [] for itr in xrange(N_samples): lps.append(test_model.log_probability()) # plls.append(test_model.heldout_log_likelihood(S_test, F=F_test)) samples.append(test_model.copy_sample()) print "" print "Gibbs iteration ", itr print "LP: ", lps[-1] test_model.resample_model() # Update plot if itr % 1 == 0: update_plots(itr, test_model, S, ln, im_clus, im_net) ########################################################### # Analyze the samples ########################################################### analyze_samples(true_model, init_model, samples, lps)