def fit_ct_network_hawkes_gibbs(S, S_test, dt, dt_max, output_path, model_args={}, standard_model=None, N_samples=100, time_limit=8 * 60 * 60): K = S.shape[1] S_ct, C_ct, T = convert_discrete_to_continuous(S, dt) S_test_ct, C_test_ct, T_test = convert_discrete_to_continuous(S_test, dt) # Check for existing Gibbs results if os.path.exists(output_path): with gzip.open(output_path, 'r') as f: print("Loading Gibbs results from ", output_path) results = pickle.load(f) else: print( "Fitting the data with a continuous time network Hawkes model using Gibbs sampling" ) test_model = \ ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, **model_args) test_model.add_data(S_ct, C_ct, T) # Initialize with the standard model parameters if standard_model is not None: test_model.initialize_with_standard_model(standard_model) # Gibbs sample samples = [] lps = [test_model.log_probability()] hlls = [ test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test) ] times = [0] for _ in progprint_xrange(N_samples, perline=25): # Update the model tic = time.time() test_model.resample_model() times.append(time.time() - tic) samples.append(copy.deepcopy(test_model.get_parameters())) # Compute log probability and heldout log likelihood # lps.append(test_model.log_probability()) hlls.append( test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test)) # # Save this sample # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f: # cPickle.dump(samples[-1], f, protocol=-1) # Check if time limit has been exceeded if np.sum(times) > time_limit: break # Get cumulative timestamps timestamps = np.cumsum(times) lps = np.array(lps) hlls = np.array(hlls) # Make results object results = Results(samples, timestamps, lps, hlls) # Save the Gibbs samples with gzip.open(output_path, 'w') as f: print("Saving Gibbs samples to ", output_path) pickle.dump(results, f, protocol=-1) return results
def fit_ct_network_hawkes_gibbs(S, S_test, dt, dt_max, output_path, model_args={}, standard_model=None, N_samples=100, time_limit=8*60*60): K = S.shape[1] S_ct, C_ct, T = convert_discrete_to_continuous(S, dt) S_test_ct, C_test_ct, T_test = convert_discrete_to_continuous(S_test, dt) # Check for existing Gibbs results if os.path.exists(output_path): with gzip.open(output_path, 'r') as f: print "Loading Gibbs results from ", output_path results = cPickle.load(f) else: print "Fitting the data with a continuous time network Hawkes model using Gibbs sampling" test_model = \ ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, **model_args) test_model.add_data(S_ct, C_ct, T) # Initialize with the standard model parameters if standard_model is not None: test_model.initialize_with_standard_model(standard_model) # Gibbs sample samples = [] lps = [test_model.log_probability()] hlls = [test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test)] times = [0] for _ in progprint_xrange(N_samples, perline=25): # Update the model tic = time.time() test_model.resample_model() times.append(time.time() - tic) samples.append(copy.deepcopy(test_model.get_parameters())) # Compute log probability and heldout log likelihood # lps.append(test_model.log_probability()) hlls.append(test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test)) # # Save this sample # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f: # cPickle.dump(samples[-1], f, protocol=-1) # Check if time limit has been exceeded if np.sum(times) > time_limit: break # Get cumulative timestamps timestamps = np.cumsum(times) lps = np.array(lps) hlls = np.array(hlls) # Make results object results = Results(samples, timestamps, lps, hlls) # Save the Gibbs samples with gzip.open(output_path, 'w') as f: print "Saving Gibbs samples to ", output_path cPickle.dump(results, f, protocol=-1) return results
results_dir = os.path.join("results", "hippocampus", "run002") for network, name in zip(networks, names): results_file = os.path.join(results_dir, "%s.pkl" % name) if os.path.exists(results_file): with open(results_file, "r") as f: result = pickle.load(f) results.append(result) continue print("Fitting model with ", name, " network.") model = ContinuousTimeNetworkHawkesModel( K, dt_max=1., network=network) model.add_data(S_train, C_train, T_train) model.resample_model() # Add the test data and then remove it. That way we can # efficiently compute its predictive log likelihood model.add_data(S_test, C_test, T - T_train) test_data = model.data_list.pop() ### Fit the model lls = [model.log_likelihood()] plls = [model.log_likelihood(test_data)] Weffs = [] Ps = [] Ls = [] for iter in progprint_xrange(N_samples, perline=25): model.resample_model()
def test_geweke(): """ Create a discrete time Hawkes model and generate from it. :return: """ K = 1 T = 50.0 dt = 1.0 dt_max = 3.0 # network_hypers = {'C': 1, 'p': 0.5, 'kappa': 3.0, 'alpha': 3.0, 'beta': 1.0/20.0} network_hypers = {'c': np.zeros(K, dtype=np.int), 'p': 0.5, 'kappa': 10.0, 'v': 10*3.0} bkgd_hypers = {"alpha": 1., "beta": 10.} model = ContinuousTimeNetworkHawkesModel(K=K, dt_max=dt_max, network_hypers=network_hypers) model.generate(T=T) # Gibbs sample and then generate new data N_samples = 1000 samples = [] lps = [] for itr in progprint_xrange(N_samples, perline=50): # Resample the model model.resample_model() samples.append(model.copy_sample()) lps.append(model.log_likelihood()) # Geweke step model.data_list.pop() model.generate(T=T) # Compute sample statistics for second half of samples A_samples = np.array([s.weight_model.A for s in samples]) W_samples = np.array([s.weight_model.W for s in samples]) mu_samples = np.array([s.impulse_model.mu for s in samples]) tau_samples = np.array([s.impulse_model.tau for s in samples]) lambda0_samples = np.array([s.bias_model.lambda0 for s in samples]) lps = np.array(lps) offset = 0 A_mean = A_samples[offset:, ...].mean(axis=0) W_mean = W_samples[offset:, ...].mean(axis=0) mu_mean = mu_samples[offset:, ...].mean(axis=0) tau_mean = tau_samples[offset:, ...].mean(axis=0) lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0) print("A mean: ", A_mean) print("W mean: ", W_mean) print("mu mean: ", mu_mean) print("tau mean: ", tau_mean) print("lambda0 mean: ", lambda0_mean) # Plot the log probability over iterations plt.figure() plt.plot(np.arange(N_samples), lps) plt.xlabel("Iteration") plt.ylabel("Log probability") # Plot the histogram of bias samples plt.figure() p_lmbda0 = gamma(model.bias_model.alpha, scale=1./model.bias_model.beta) _, bins, _ = plt.hist(lambda0_samples[:,0], bins=50, alpha=0.5, normed=True) bincenters = 0.5*(bins[1:]+bins[:-1]) plt.plot(bincenters, p_lmbda0.pdf(bincenters), 'r--', linewidth=1) plt.xlabel('lam0') plt.ylabel('p(lam0)') print("Expected p(A): ", model.network.P) print("Empirical p(A): ", A_samples.mean(axis=0)) # Plot the histogram of weight samples plt.figure() Aeq1 = A_samples[:,0,0] == 1 # p_W1 = gamma(model.network.kappa, scale=1./model.network.v[0,0]) # p_W1 = betaprime(model.network.kappa, model.network.alpha, scale=model.network.beta) p_W1 = gamma(model.network.kappa, scale=1./model.network.v[0,0]) if np.sum(Aeq1) > 0: _, bins, _ = plt.hist(W_samples[Aeq1,0,0], bins=50, alpha=0.5, normed=True) bincenters = 0.5*(bins[1:]+bins[:-1]) plt.plot(bincenters, p_W1.pdf(bincenters), 'r--', linewidth=1) plt.xlabel('W') plt.ylabel('p(W | A=1)') # Plot the histogram of impulse precisions plt.figure() p_tau = gamma(model.impulse_model.alpha_0, scale=1./model.impulse_model.beta_0) _, bins, _ = plt.hist(tau_samples[:,0,0], bins=50, alpha=0.5, normed=True) bincenters = 0.5*(bins[1:]+bins[:-1]) plt.plot(bincenters, p_tau.pdf(bincenters), 'r--', linewidth=1) plt.xlabel('tau') plt.ylabel('p(tau)') # Plot the histogram of impulse means plt.figure() p_mu = t(df=2*model.impulse_model.alpha_0, loc=model.impulse_model.mu_0, scale=np.sqrt(model.impulse_model.beta_0/(model.impulse_model.alpha_0*model.impulse_model.lmbda_0))) _, bins, _ = plt.hist(mu_samples[:,0,0], bins=50, alpha=0.5, normed=True) bincenters = 0.5*(bins[1:]+bins[:-1]) plt.plot(bincenters, p_mu.pdf(bincenters), 'r--', linewidth=1) plt.xlabel('mu') plt.ylabel('p(mu)') plt.show()
results_dir = os.path.join("results", "hippocampus", "run002") for network, name in zip(networks, names): results_file = os.path.join(results_dir, "%s.pkl" % name) if os.path.exists(results_file): with open(results_file, "r") as f: result = cPickle.load(f) results.append(result) continue print "Fitting model with ", name, " network." model = ContinuousTimeNetworkHawkesModel( K, dt_max=1., network=network) model.add_data(S_train, C_train, T_train) model.resample_model() # Add the test data and then remove it. That way we can # efficiently compute its predictive log likelihood model.add_data(S_test, C_test, T - T_train) test_data = model.data_list.pop() ### Fit the model lls = [model.log_likelihood()] plls = [model.log_likelihood(test_data)] Weffs = [] Ps = [] Ls = [] for iter in progprint_xrange(N_samples, perline=25): model.resample_model()
def test_geweke(): """ Create a discrete time Hawkes model and generate from it. :return: """ K = 1 T = 50.0 dt = 1.0 dt_max = 3.0 # network_hypers = {'C': 1, 'p': 0.5, 'kappa': 3.0, 'alpha': 3.0, 'beta': 1.0/20.0} network_hypers = {"c": np.zeros(K, dtype=np.int), "p": 0.5, "kappa": 10.0, "v": 10 * 3.0} bkgd_hypers = {"alpha": 1.0, "beta": 10.0} model = ContinuousTimeNetworkHawkesModel(K=K, dt_max=dt_max, network_hypers=network_hypers) model.generate(T=T) # Gibbs sample and then generate new data N_samples = 1000 samples = [] lps = [] for itr in progprint_xrange(N_samples, perline=50): # Resample the model model.resample_model() samples.append(model.copy_sample()) lps.append(model.log_likelihood()) # Geweke step model.data_list.pop() model.generate(T=T) # Compute sample statistics for second half of samples A_samples = np.array([s.weight_model.A for s in samples]) W_samples = np.array([s.weight_model.W for s in samples]) mu_samples = np.array([s.impulse_model.mu for s in samples]) tau_samples = np.array([s.impulse_model.tau for s in samples]) lambda0_samples = np.array([s.bias_model.lambda0 for s in samples]) lps = np.array(lps) offset = 0 A_mean = A_samples[offset:, ...].mean(axis=0) W_mean = W_samples[offset:, ...].mean(axis=0) mu_mean = mu_samples[offset:, ...].mean(axis=0) tau_mean = tau_samples[offset:, ...].mean(axis=0) lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0) print "A mean: ", A_mean print "W mean: ", W_mean print "mu mean: ", mu_mean print "tau mean: ", tau_mean print "lambda0 mean: ", lambda0_mean # Plot the log probability over iterations plt.figure() plt.plot(np.arange(N_samples), lps) plt.xlabel("Iteration") plt.ylabel("Log probability") # Plot the histogram of bias samples plt.figure() p_lmbda0 = gamma(model.bias_model.alpha, scale=1.0 / model.bias_model.beta) _, bins, _ = plt.hist(lambda0_samples[:, 0], bins=50, alpha=0.5, normed=True) bincenters = 0.5 * (bins[1:] + bins[:-1]) plt.plot(bincenters, p_lmbda0.pdf(bincenters), "r--", linewidth=1) plt.xlabel("lam0") plt.ylabel("p(lam0)") print "Expected p(A): ", model.network.P print "Empirical p(A): ", A_samples.mean(axis=0) # Plot the histogram of weight samples plt.figure() Aeq1 = A_samples[:, 0, 0] == 1 # p_W1 = gamma(model.network.kappa, scale=1./model.network.v[0,0]) # p_W1 = betaprime(model.network.kappa, model.network.alpha, scale=model.network.beta) p_W1 = gamma(model.network.kappa, scale=1.0 / model.network.v[0, 0]) if np.sum(Aeq1) > 0: _, bins, _ = plt.hist(W_samples[Aeq1, 0, 0], bins=50, alpha=0.5, normed=True) bincenters = 0.5 * (bins[1:] + bins[:-1]) plt.plot(bincenters, p_W1.pdf(bincenters), "r--", linewidth=1) plt.xlabel("W") plt.ylabel("p(W | A=1)") # Plot the histogram of impulse precisions plt.figure() p_tau = gamma(model.impulse_model.alpha_0, scale=1.0 / model.impulse_model.beta_0) _, bins, _ = plt.hist(tau_samples[:, 0, 0], bins=50, alpha=0.5, normed=True) bincenters = 0.5 * (bins[1:] + bins[:-1]) plt.plot(bincenters, p_tau.pdf(bincenters), "r--", linewidth=1) plt.xlabel("tau") plt.ylabel("p(tau)") # Plot the histogram of impulse means plt.figure() p_mu = t( df=2 * model.impulse_model.alpha_0, loc=model.impulse_model.mu_0, scale=np.sqrt(model.impulse_model.beta_0 / (model.impulse_model.alpha_0 * model.impulse_model.lmbda_0)), ) _, bins, _ = plt.hist(mu_samples[:, 0, 0], bins=50, alpha=0.5, normed=True) bincenters = 0.5 * (bins[1:] + bins[:-1]) plt.plot(bincenters, p_mu.pdf(bincenters), "r--", linewidth=1) plt.xlabel("mu") plt.ylabel("p(mu)") plt.show()