def execute_toy(self,mode="discrete",dt_max=3,N_samples=1000,network_priors={"p": 1.0, "allow_self_connections": False}): #np.random.seed(0) if mode == 'discrete': test_model1 = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=self.K, dt_max=dt_max, network_hypers=network_priors) test_model1.add_data(self.data) test_model1.initialize_with_standard_model(None) elif mode == 'continuous': test_model = ContinuousTimeNetworkHawkesModel(self.K, dt_max=dt_max, network_hypers=network_hypers) test_model.add_data(self.data,self.labels) ########################################################### # Fit the test model with Gibbs sampling ########################################################### samples = [] lps = [] #for itr in xrange(N_samples): # test_model1.resample_model() # lps.append(test_model1.log_probability()) # samples.append(test_model1.copy_sample()) test_model = DiscreteTimeStandardHawkesModel(K=self.K, dt_max=dt_max, allow_self_connections= False) #test_model.initialize_with_gibbs_model(test_model1) test_model.add_data(self.data) test_model.fit_with_bfgs() impulse = test_model1.impulse_model.impulses responses = {} #for i in range(3): # responses[str(i)] = [] # for j in range(3): # responses[str(i)].append({"key":"response: process "+str(i)+" to "+str(j),"values":[{"x":idx,"y":k} for idx,k in enumerate(impulse[:,i,j])]}) # with open('/Users/PauKung/hawkes_demo/webapp/static/data/response'+str(i)+'.json','w') as outfile: # json.dump({"out":responses[str(i)]},outfile) # calculate convolved basis rr = test_model.basis.convolve_with_basis(np.ones((dt_max*2,self.K))) impulse = np.sum(rr, axis=2) impulse[dt_max:,:] = 0 for i in range(3): responses[str(i)] = {"key":"response: process "+str(i),"values":[{"x":idx,"y":k} for idx,k in enumerate(impulse[:,i])]} with open('/Users/PauKung/hawkes_demo/webapp/static/data/response'+str(i)+'.json','w') as outfile: json.dump({"out":responses[str(i)]},outfile) rates = test_model.compute_rate()#self.compute_rate(test_model,mode,dt_max) inferred_rate = {} S,F = test_model.data_list[0] print F for i in range(3): inferred_rate[str(i)] = [] inferred_rate[str(i)].append({"key":"background", "values":[[j,test_model.bias[i]] for j in range(self.T)]}) #"values":[[j,test_model1.bias_model.lambda0[i]] for j in range(self.T)]}) for i in range(3): inferred_rate[str(i)].append({"key":"influence: process"+str(i), "values":[[idx,j-test_model.bias[i]] for idx,j in enumerate(rates[:,i])]}) with open('/Users/PauKung/hawkes_demo/webapp/static/data/infer'+str(i)+'.json','w') as outfile: json.dump({"out":inferred_rate[str(i)]},outfile) # output response function diagram (K x K timeseries) #plt.subplot(3,3,1) #for i in range(3): # for j in range(3): # plt.subplot(3,3,3*i+(j+1)) # plt.plot(np.arange(4),impulse[:,i,j],color="#377eb8", lw=2) #plt.savefig(fpath+"response_fun.png",transparent=True) # output background bias diagram (K x 1 timeseries) #plt.subplot(3,1,1) #for i in range(3): # plt.subplot(3,1,i+1) # plt.plot(np.arange(4),[test_model.bias_model.lambda0[i] for j in range(4)],color="#333333",lw=2) #plt.savefig(fpath+"bias.png",transparent=True) # output inferred rate diagram (K x 1 timeseries) #test_figure, test_handles = test_model.plot(color="#e41a1c", T_slice=(0,self.T)) #plt.savefig(fpath+"inferred_rate.png",transparent=True) print test_model.W return test_model.W, inferred_rate, responses
def fit_ct_network_hawkes_gibbs(S, S_test, dt, dt_max, output_path, model_args={}, standard_model=None, N_samples=100, time_limit=8 * 60 * 60): K = S.shape[1] S_ct, C_ct, T = convert_discrete_to_continuous(S, dt) S_test_ct, C_test_ct, T_test = convert_discrete_to_continuous(S_test, dt) # Check for existing Gibbs results if os.path.exists(output_path): with gzip.open(output_path, 'r') as f: print("Loading Gibbs results from ", output_path) results = pickle.load(f) else: print( "Fitting the data with a continuous time network Hawkes model using Gibbs sampling" ) test_model = \ ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, **model_args) test_model.add_data(S_ct, C_ct, T) # Initialize with the standard model parameters if standard_model is not None: test_model.initialize_with_standard_model(standard_model) # Gibbs sample samples = [] lps = [test_model.log_probability()] hlls = [ test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test) ] times = [0] for _ in progprint_xrange(N_samples, perline=25): # Update the model tic = time.time() test_model.resample_model() times.append(time.time() - tic) samples.append(copy.deepcopy(test_model.get_parameters())) # Compute log probability and heldout log likelihood # lps.append(test_model.log_probability()) hlls.append( test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test)) # # Save this sample # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f: # cPickle.dump(samples[-1], f, protocol=-1) # Check if time limit has been exceeded if np.sum(times) > time_limit: break # Get cumulative timestamps timestamps = np.cumsum(times) lps = np.array(lps) hlls = np.array(hlls) # Make results object results = Results(samples, timestamps, lps, hlls) # Save the Gibbs samples with gzip.open(output_path, 'w') as f: print("Saving Gibbs samples to ", output_path) pickle.dump(results, f, protocol=-1) return results
def fit_ct_network_hawkes_gibbs(S, K, C, dt, dt_max, output_path, standard_model=None): # Check for existing Gibbs results if os.path.exists(output_path + ".gibbs.pkl"): with open(output_path + ".gibbs.pkl", "r") as f: print "Loading Gibbs results from ", (output_path + ".gibbs.pkl") (samples, timestamps) = cPickle.load(f) else: print "Fitting the data with a network Hawkes model using Gibbs sampling" S_ct, C_ct, T = convert_discrete_to_continuous(S, dt) # Set the network prior such that E[W] ~= 0.01 # W ~ Gamma(kappa, v) for kappa = 1.25 => v ~ 125 # v ~ Gamma(alpha, beta) for alpha = 10, beta = 10 / 125 E_W = 0.2 kappa = 10.0 E_v = kappa / E_W alpha = 5.0 beta = alpha / E_v network_hypers = { "C": 1, "c": np.zeros(K).astype(np.int), "p": 0.25, "v": E_v, # 'kappa': kappa, # 'alpha': alpha, 'beta': beta, # 'p': 0.1, "allow_self_connections": False, } test_model = ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, network_hypers=network_hypers) test_model.add_data(S_ct, C_ct, T) # Initialize with the standard model parameters if standard_model is not None: test_model.initialize_with_standard_model(standard_model) plt.ion() im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.025) plt.pause(0.001) # Gibbs sample N_samples = 100 samples = [] lps = [test_model.log_probability()] timestamps = [] for itr in xrange(N_samples): if itr % 1 == 0: print "Iteration ", itr, "\tLL: ", lps[-1] im.set_data(test_model.weight_model.W_effective) plt.pause(0.001) # lps.append(test_model.log_probability()) lps.append(test_model.log_probability()) samples.append(test_model.resample_and_copy()) timestamps.append(time.clock()) print test_model.network.p # Save this sample with open(output_path + ".gibbs.itr%04d.pkl" % itr, "w") as f: cPickle.dump(samples[-1], f, protocol=-1) # Save the Gibbs samples with open(output_path + ".gibbs.pkl", "w") as f: print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl") cPickle.dump((samples, timestamps), f, protocol=-1) return samples, timestamps
def fit_ct_network_hawkes_gibbs(S, K, C, dt, dt_max, output_path, standard_model=None): # Check for existing Gibbs results if os.path.exists(output_path + ".gibbs.pkl"): with open(output_path + ".gibbs.pkl", 'r') as f: print "Loading Gibbs results from ", (output_path + ".gibbs.pkl") (samples, timestamps) = cPickle.load(f) else: print "Fitting the data with a network Hawkes model using Gibbs sampling" S_ct, C_ct, T = convert_discrete_to_continuous(S, dt) # Set the network prior such that E[W] ~= 0.01 # W ~ Gamma(kappa, v) for kappa = 1.25 => v ~ 125 # v ~ Gamma(alpha, beta) for alpha = 10, beta = 10 / 125 E_W = 0.2 kappa = 10. E_v = kappa / E_W alpha = 5. beta = alpha / E_v network_hypers = { 'C': 1, "c": np.zeros(K).astype(np.int), "p": 0.25, "v": E_v, # 'kappa': kappa, # 'alpha': alpha, 'beta': beta, # 'p': 0.1, 'allow_self_connections': False } test_model = \ ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, network_hypers=network_hypers) test_model.add_data(S_ct, C_ct, T) # Initialize with the standard model parameters if standard_model is not None: test_model.initialize_with_standard_model(standard_model) plt.ion() im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.025) plt.pause(0.001) # Gibbs sample N_samples = 100 samples = [] lps = [test_model.log_probability()] timestamps = [] for itr in xrange(N_samples): if itr % 1 == 0: print "Iteration ", itr, "\tLL: ", lps[-1] im.set_data(test_model.weight_model.W_effective) plt.pause(0.001) # lps.append(test_model.log_probability()) lps.append(test_model.log_probability()) samples.append(test_model.resample_and_copy()) timestamps.append(time.clock()) print test_model.network.p # Save this sample with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f: cPickle.dump(samples[-1], f, protocol=-1) # Save the Gibbs samples with open(output_path + ".gibbs.pkl", 'w') as f: print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl") cPickle.dump((samples, timestamps), f, protocol=-1) return samples, timestamps
def fit_ct_network_hawkes_gibbs(S, S_test, dt, dt_max, output_path, model_args={}, standard_model=None, N_samples=100, time_limit=8*60*60): K = S.shape[1] S_ct, C_ct, T = convert_discrete_to_continuous(S, dt) S_test_ct, C_test_ct, T_test = convert_discrete_to_continuous(S_test, dt) # Check for existing Gibbs results if os.path.exists(output_path): with gzip.open(output_path, 'r') as f: print "Loading Gibbs results from ", output_path results = cPickle.load(f) else: print "Fitting the data with a continuous time network Hawkes model using Gibbs sampling" test_model = \ ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, **model_args) test_model.add_data(S_ct, C_ct, T) # Initialize with the standard model parameters if standard_model is not None: test_model.initialize_with_standard_model(standard_model) # Gibbs sample samples = [] lps = [test_model.log_probability()] hlls = [test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test)] times = [0] for _ in progprint_xrange(N_samples, perline=25): # Update the model tic = time.time() test_model.resample_model() times.append(time.time() - tic) samples.append(copy.deepcopy(test_model.get_parameters())) # Compute log probability and heldout log likelihood # lps.append(test_model.log_probability()) hlls.append(test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test)) # # Save this sample # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f: # cPickle.dump(samples[-1], f, protocol=-1) # Check if time limit has been exceeded if np.sum(times) > time_limit: break # Get cumulative timestamps timestamps = np.cumsum(times) lps = np.array(lps) hlls = np.array(hlls) # Make results object results = Results(samples, timestamps, lps, hlls) # Save the Gibbs samples with gzip.open(output_path, 'w') as f: print "Saving Gibbs samples to ", output_path cPickle.dump(results, f, protocol=-1) return results
### Fit the models N_samples = 1000 results = [] results_dir = os.path.join("results", "hippocampus", "run002") for network, name in zip(networks, names): results_file = os.path.join(results_dir, "%s.pkl" % name) if os.path.exists(results_file): with open(results_file, "r") as f: result = pickle.load(f) results.append(result) continue print("Fitting model with ", name, " network.") model = ContinuousTimeNetworkHawkesModel( K, dt_max=1., network=network) model.add_data(S_train, C_train, T_train) model.resample_model() # Add the test data and then remove it. That way we can # efficiently compute its predictive log likelihood model.add_data(S_test, C_test, T - T_train) test_data = model.data_list.pop() ### Fit the model lls = [model.log_likelihood()] plls = [model.log_likelihood(test_data)] Weffs = [] Ps = []
def test_geweke(): """ Create a discrete time Hawkes model and generate from it. :return: """ K = 1 T = 50.0 dt = 1.0 dt_max = 3.0 # network_hypers = {'C': 1, 'p': 0.5, 'kappa': 3.0, 'alpha': 3.0, 'beta': 1.0/20.0} network_hypers = {'c': np.zeros(K, dtype=np.int), 'p': 0.5, 'kappa': 10.0, 'v': 10*3.0} bkgd_hypers = {"alpha": 1., "beta": 10.} model = ContinuousTimeNetworkHawkesModel(K=K, dt_max=dt_max, network_hypers=network_hypers) model.generate(T=T) # Gibbs sample and then generate new data N_samples = 1000 samples = [] lps = [] for itr in progprint_xrange(N_samples, perline=50): # Resample the model model.resample_model() samples.append(model.copy_sample()) lps.append(model.log_likelihood()) # Geweke step model.data_list.pop() model.generate(T=T) # Compute sample statistics for second half of samples A_samples = np.array([s.weight_model.A for s in samples]) W_samples = np.array([s.weight_model.W for s in samples]) mu_samples = np.array([s.impulse_model.mu for s in samples]) tau_samples = np.array([s.impulse_model.tau for s in samples]) lambda0_samples = np.array([s.bias_model.lambda0 for s in samples]) lps = np.array(lps) offset = 0 A_mean = A_samples[offset:, ...].mean(axis=0) W_mean = W_samples[offset:, ...].mean(axis=0) mu_mean = mu_samples[offset:, ...].mean(axis=0) tau_mean = tau_samples[offset:, ...].mean(axis=0) lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0) print("A mean: ", A_mean) print("W mean: ", W_mean) print("mu mean: ", mu_mean) print("tau mean: ", tau_mean) print("lambda0 mean: ", lambda0_mean) # Plot the log probability over iterations plt.figure() plt.plot(np.arange(N_samples), lps) plt.xlabel("Iteration") plt.ylabel("Log probability") # Plot the histogram of bias samples plt.figure() p_lmbda0 = gamma(model.bias_model.alpha, scale=1./model.bias_model.beta) _, bins, _ = plt.hist(lambda0_samples[:,0], bins=50, alpha=0.5, normed=True) bincenters = 0.5*(bins[1:]+bins[:-1]) plt.plot(bincenters, p_lmbda0.pdf(bincenters), 'r--', linewidth=1) plt.xlabel('lam0') plt.ylabel('p(lam0)') print("Expected p(A): ", model.network.P) print("Empirical p(A): ", A_samples.mean(axis=0)) # Plot the histogram of weight samples plt.figure() Aeq1 = A_samples[:,0,0] == 1 # p_W1 = gamma(model.network.kappa, scale=1./model.network.v[0,0]) # p_W1 = betaprime(model.network.kappa, model.network.alpha, scale=model.network.beta) p_W1 = gamma(model.network.kappa, scale=1./model.network.v[0,0]) if np.sum(Aeq1) > 0: _, bins, _ = plt.hist(W_samples[Aeq1,0,0], bins=50, alpha=0.5, normed=True) bincenters = 0.5*(bins[1:]+bins[:-1]) plt.plot(bincenters, p_W1.pdf(bincenters), 'r--', linewidth=1) plt.xlabel('W') plt.ylabel('p(W | A=1)') # Plot the histogram of impulse precisions plt.figure() p_tau = gamma(model.impulse_model.alpha_0, scale=1./model.impulse_model.beta_0) _, bins, _ = plt.hist(tau_samples[:,0,0], bins=50, alpha=0.5, normed=True) bincenters = 0.5*(bins[1:]+bins[:-1]) plt.plot(bincenters, p_tau.pdf(bincenters), 'r--', linewidth=1) plt.xlabel('tau') plt.ylabel('p(tau)') # Plot the histogram of impulse means plt.figure() p_mu = t(df=2*model.impulse_model.alpha_0, loc=model.impulse_model.mu_0, scale=np.sqrt(model.impulse_model.beta_0/(model.impulse_model.alpha_0*model.impulse_model.lmbda_0))) _, bins, _ = plt.hist(mu_samples[:,0,0], bins=50, alpha=0.5, normed=True) bincenters = 0.5*(bins[1:]+bins[:-1]) plt.plot(bincenters, p_mu.pdf(bincenters), 'r--', linewidth=1) plt.xlabel('mu') plt.ylabel('p(mu)') plt.show()
### Fit the models N_samples = 1000 results = [] results_dir = os.path.join("results", "hippocampus", "run002") for network, name in zip(networks, names): results_file = os.path.join(results_dir, "%s.pkl" % name) if os.path.exists(results_file): with open(results_file, "r") as f: result = cPickle.load(f) results.append(result) continue print "Fitting model with ", name, " network." model = ContinuousTimeNetworkHawkesModel( K, dt_max=1., network=network) model.add_data(S_train, C_train, T_train) model.resample_model() # Add the test data and then remove it. That way we can # efficiently compute its predictive log likelihood model.add_data(S_test, C_test, T - T_train) test_data = model.data_list.pop() ### Fit the model lls = [model.log_likelihood()] plls = [model.log_likelihood(test_data)] Weffs = [] Ps = []
def test_geweke(): """ Create a discrete time Hawkes model and generate from it. :return: """ K = 1 T = 50.0 dt = 1.0 dt_max = 3.0 # network_hypers = {'C': 1, 'p': 0.5, 'kappa': 3.0, 'alpha': 3.0, 'beta': 1.0/20.0} network_hypers = {"c": np.zeros(K, dtype=np.int), "p": 0.5, "kappa": 10.0, "v": 10 * 3.0} bkgd_hypers = {"alpha": 1.0, "beta": 10.0} model = ContinuousTimeNetworkHawkesModel(K=K, dt_max=dt_max, network_hypers=network_hypers) model.generate(T=T) # Gibbs sample and then generate new data N_samples = 1000 samples = [] lps = [] for itr in progprint_xrange(N_samples, perline=50): # Resample the model model.resample_model() samples.append(model.copy_sample()) lps.append(model.log_likelihood()) # Geweke step model.data_list.pop() model.generate(T=T) # Compute sample statistics for second half of samples A_samples = np.array([s.weight_model.A for s in samples]) W_samples = np.array([s.weight_model.W for s in samples]) mu_samples = np.array([s.impulse_model.mu for s in samples]) tau_samples = np.array([s.impulse_model.tau for s in samples]) lambda0_samples = np.array([s.bias_model.lambda0 for s in samples]) lps = np.array(lps) offset = 0 A_mean = A_samples[offset:, ...].mean(axis=0) W_mean = W_samples[offset:, ...].mean(axis=0) mu_mean = mu_samples[offset:, ...].mean(axis=0) tau_mean = tau_samples[offset:, ...].mean(axis=0) lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0) print "A mean: ", A_mean print "W mean: ", W_mean print "mu mean: ", mu_mean print "tau mean: ", tau_mean print "lambda0 mean: ", lambda0_mean # Plot the log probability over iterations plt.figure() plt.plot(np.arange(N_samples), lps) plt.xlabel("Iteration") plt.ylabel("Log probability") # Plot the histogram of bias samples plt.figure() p_lmbda0 = gamma(model.bias_model.alpha, scale=1.0 / model.bias_model.beta) _, bins, _ = plt.hist(lambda0_samples[:, 0], bins=50, alpha=0.5, normed=True) bincenters = 0.5 * (bins[1:] + bins[:-1]) plt.plot(bincenters, p_lmbda0.pdf(bincenters), "r--", linewidth=1) plt.xlabel("lam0") plt.ylabel("p(lam0)") print "Expected p(A): ", model.network.P print "Empirical p(A): ", A_samples.mean(axis=0) # Plot the histogram of weight samples plt.figure() Aeq1 = A_samples[:, 0, 0] == 1 # p_W1 = gamma(model.network.kappa, scale=1./model.network.v[0,0]) # p_W1 = betaprime(model.network.kappa, model.network.alpha, scale=model.network.beta) p_W1 = gamma(model.network.kappa, scale=1.0 / model.network.v[0, 0]) if np.sum(Aeq1) > 0: _, bins, _ = plt.hist(W_samples[Aeq1, 0, 0], bins=50, alpha=0.5, normed=True) bincenters = 0.5 * (bins[1:] + bins[:-1]) plt.plot(bincenters, p_W1.pdf(bincenters), "r--", linewidth=1) plt.xlabel("W") plt.ylabel("p(W | A=1)") # Plot the histogram of impulse precisions plt.figure() p_tau = gamma(model.impulse_model.alpha_0, scale=1.0 / model.impulse_model.beta_0) _, bins, _ = plt.hist(tau_samples[:, 0, 0], bins=50, alpha=0.5, normed=True) bincenters = 0.5 * (bins[1:] + bins[:-1]) plt.plot(bincenters, p_tau.pdf(bincenters), "r--", linewidth=1) plt.xlabel("tau") plt.ylabel("p(tau)") # Plot the histogram of impulse means plt.figure() p_mu = t( df=2 * model.impulse_model.alpha_0, loc=model.impulse_model.mu_0, scale=np.sqrt(model.impulse_model.beta_0 / (model.impulse_model.alpha_0 * model.impulse_model.lmbda_0)), ) _, bins, _ = plt.hist(mu_samples[:, 0, 0], bins=50, alpha=0.5, normed=True) bincenters = 0.5 * (bins[1:] + bins[:-1]) plt.plot(bincenters, p_mu.pdf(bincenters), "r--", linewidth=1) plt.xlabel("mu") plt.ylabel("p(mu)") plt.show()