def fit_network_hawkes_gibbs(S, K, C, dt, dt_max, output_path, standard_model=None): # Check for existing Gibbs results if os.path.exists(output_path + ".gibbs.pkl"): with open(output_path + ".gibbs.pkl", 'r') as f: print "Loading Gibbs results from ", (output_path + ".gibbs.pkl") (samples, timestamps) = cPickle.load(f) else: print "Fitting the data with a network Hawkes model using Gibbs sampling" # Make a new model for inference # test_model = DiscreteTimeNetworkHawkesModelGammaMixture(C=C, K=K, dt=dt, dt_max=dt_max, B=B, # alpha=1.0, beta=1.0/20.0) test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True) network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/10.0, 'tau1': 1.0, 'tau0': 10.0, 'allow_self_connections': False} test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max, basis=test_basis, network_hypers=network_hypers) test_model.add_data(S) # Initialize with the standard model parameters if standard_model is not None: test_model.initialize_with_standard_model(standard_model) plt.ion() im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5) plt.pause(0.001) # Gibbs sample N_samples = 100 samples = [] lps = [test_model.log_probability()] timestamps = [] for itr in xrange(N_samples): if itr % 1 == 0: print "Iteration ", itr, "\tLL: ", lps[-1] im.set_data(test_model.weight_model.W_effective) plt.pause(0.001) # lps.append(test_model.log_probability()) lps.append(test_model.log_probability()) samples.append(test_model.resample_and_copy()) timestamps.append(time.clock()) # Save this sample with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f: cPickle.dump(samples[-1], f, protocol=-1) # Save the Gibbs samples with open(output_path + ".gibbs.pkl", 'w') as f: print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl") cPickle.dump((samples, timestamps), f, protocol=-1) return samples, timestamps
def demo(seed=None): """ Create a discrete time Hawkes model and generate from it. :return: """ raise NotImplementedError("This example needs to be updated.") if seed is None: seed = np.random.randint(2**32) print("Setting seed to ", seed) np.random.seed(seed) C = 1 # Number of clusters in the true data K = 10 # Number of nodes T = 1000 # Number of time bins to simulate dt = 0.02 # Time bin size dt_max = 0.08 B = 3 # Number of basis functions # Sample from a sparse network Hawkes model S, true_model = sample_from_network_hawkes(C, K, T, dt, dt_max, B) # Make a new model for inference test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=False) test_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, dt_max=dt_max + dt, beta=1.0, basis=test_basis, allow_self_connections=True) test_model.add_data(S) # DEBUG: Initialize with the true parameters of the network Hawkes model # test_model.initialize_with_gibbs_model(true_model) test_model.fit_with_bfgs() print("W true: ", true_model.weight_model.A * true_model.weight_model.W) print("lambda0 true: ", true_model.bias_model.lambda0) print("ll true: ", true_model.log_likelihood()) print("") print("W test: ", test_model.W) print("lambda0 test ", test_model.bias) print("ll test: ", test_model.log_likelihood()) plot_network(np.ones((K, K)), test_model.W, vmax=0.5) # Plot the rates plt.figure() for k in range(3): plt.subplot(3, 1, k + 1) plt.plot(np.arange(T) * dt, true_model.compute_rate(proc=k), '-b') plt.plot(np.arange(T) * dt, test_model.compute_rate(ks=k), '-r') plt.ioff() plt.show()
def fit_standard_hawkes_model_bfgs_noxv(S, K, dt, dt_max, output_path, W_max=None): """ Fit :param S: :return: """ # Check for existing results if os.path.exists(out_path + ".bfgs.pkl"): print "Existing BFGS results found. Loading from file." with open(output_path + ".bfgs.pkl", 'r') as f: init_model, init_time = cPickle.load(f) else: print "Fitting the data with a standard Hawkes model" # We want the max W ~ -.025 and the mean to be around 0.01 # W ~ Gamma(alpha, beta) => E[W] = alpha/beta, so beta ~100 * alpha alpha = 1.1 beta = alpha * 1.0 / 0.01 # Make a model to initialize the parameters test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True) init_model = DiscreteTimeStandardHawkesModel( K=K, dt=dt, dt_max=dt_max, alpha=alpha, beta=beta, basis=test_basis, allow_self_connections=False, W_max=W_max) init_model.add_data(S) # Initialize the background rates to their mean init_model.initialize_to_background_rate() start = time.clock() init_model.fit_with_bfgs() init_time = time.clock() - start # Save the model (sans data) with open(output_path + ".bfgs.pkl", 'w') as f: print "Saving BFGS results to ", (output_path + ".bfgs.pkl") cPickle.dump((init_model, init_time), f, protocol=-1) return init_model, init_time
def fit_network_hawkes_svi(S, K, C, dt, dt_max, output_path, standard_model=None, N_iters=500, true_network=None): # Check for existing Gibbs results if os.path.exists(output_path + ".svi.pkl.gz"): with gzip.open(output_path + ".svi.pkl.gz", 'r') as f: print "Loading SVI results from ", (output_path + ".svi.pkl.gz") (samples, timestamps) = cPickle.load(f) elif os.path.exists(output_path + ".svi.itr%04d.pkl" % (N_iters - 1)): with open(output_path + ".svi.itr%04d.pkl" % (N_iters - 1), 'r') as f: print "Loading SVI results from ", (output_path + ".svi.itr%04d.pkl" % (N_iters - 1)) sample = cPickle.load(f) samples = [sample] timestamps = None # (samples, timestamps) = cPickle.load(f) else: print "Fitting the data with a network Hawkes model using SVI" # Make a new model for inference test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True) E_W = 0.01 kappa = 10. E_v = kappa / E_W alpha = 10. beta = alpha / E_v # network_hypers = {'C': 2, # 'kappa': kappa, 'alpha': alpha, 'beta': beta, # 'p': 0.1, 'tau1': 1.0, 'tau0': 1.0, # 'allow_self_connections': False} # test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max, # basis=test_basis, # network_hypers=network_hypers) network_hypers = { 'C': 2, 'kappa': kappa, 'alpha': alpha, 'beta': beta, 'p': 0.8, 'allow_self_connections': False } test_model = DiscreteTimeNetworkHawkesModelGammaMixtureSBM( K=K, dt=dt, dt_max=dt_max, basis=test_basis, network_hypers=network_hypers) # Initialize with the standard model parameters if standard_model is not None: test_model.initialize_with_standard_model(standard_model) # # plt.ion() # im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.03) # plt.pause(0.001) # TODO: Add the data in minibatches minibatchsize = 3000 test_model.add_data(S) # Stochastic variational inference samples = [] delay = 10.0 forgetting_rate = 0.5 stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate) timestamps = [] for itr in xrange(N_iters): if true_network is not None: # W_score = test_model.weight_model.expected_A() W_score = test_model.weight_model.expected_W() print "AUC: ", roc_auc_score(true_network.ravel(), W_score.ravel()) print "SVI Iter: ", itr, "\tStepsize: ", stepsize[itr] test_model.sgd_step(minibatchsize=minibatchsize, stepsize=stepsize[itr]) test_model.resample_from_mf() samples.append(test_model.copy_sample()) timestamps.append(time.clock()) # # if itr % 1 == 0: # plt.figure(1) # im.set_data(test_model.weight_model.expected_W()) # plt.pause(0.001) # Save this sample with open(output_path + ".svi.itr%04d.pkl" % itr, 'w') as f: cPickle.dump(samples[-1], f, protocol=-1) # Save the Gibbs samples with gzip.open(output_path + ".svi.pkl.gz", 'w') as f: print "Saving SVI samples to ", (output_path + ".svi.pkl.gz") cPickle.dump((samples, timestamps), f, protocol=-1) return samples, timestamps
def fit_standard_hawkes_model_bfgs(S, K, dt, dt_max, output_path, W_max=None): """ Fit :param S: :return: """ # Check for existing results if os.path.exists(out_path + ".bfgs.pkl"): print "Existing BFGS results found. Loading from file." with open(output_path + ".bfgs.pkl", 'r') as f: init_model, init_time = cPickle.load(f) else: print "Fitting the data with a standard Hawkes model" # betas = np.logspace(-1,1.3,num=1) # betas = [ 0.0 ] # We want the max W ~ -.025 and the mean to be around 0.01 # W ~ Gamma(alpha, beta) => E[W] = alpha/beta, so beta ~100 * alpha alpha = 1.1 betas = [alpha * 1.0 / 0.01] init_models = [] xv_len = 10000 init_len = S.shape[0] - 10000 S_init = S[:init_len, :] xv_ll = np.zeros(len(betas)) S_xv = S[init_len:init_len + xv_len, :] # Make a model to initialize the parameters test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True) init_model = DiscreteTimeStandardHawkesModel( K=K, dt=dt, dt_max=dt_max, alpha=alpha, beta=0.0, basis=test_basis, allow_self_connections=False, W_max=W_max) init_model.add_data(S_init) # Initialize the background rates to their mean init_model.initialize_to_background_rate() start = time.clock() for i, beta in enumerate(betas): print "Fitting with BFGS on first ", init_len, " time bins, ", \ "beta = ", beta, "W_max = ", W_max init_model.beta = beta init_model.fit_with_bfgs() init_models.append(init_model.copy_sample()) # Compute the heldout likelihood on the xv data xv_ll[i] = init_model.heldout_log_likelihood(S_xv) if not np.isfinite(xv_ll[i]): xv_ll[i] = -np.inf init_time = time.clock() - start # Take the best model print "XV predictive log likelihoods: " for beta, ll in zip(betas, xv_ll): print "Beta: %.2f\tLL: %.2f" % (beta, ll) best_ind = np.argmax(xv_ll) print "Best beta: ", betas[best_ind] init_model = init_models[best_ind] if best_ind == 0 or best_ind == len(betas) - 1: print "WARNING: Best BFGS model was for extreme value of beta. " \ "Consider expanding the beta range." # Save the model (sans data) with open(output_path + ".bfgs.pkl", 'w') as f: print "Saving BFGS results to ", (output_path + ".bfgs.pkl") cPickle.dump((init_model, init_time), f, protocol=-1) return init_model, init_time
def fit_network_hawkes_svi(S, K, C, dt, dt_max, output_path, standard_model=None, N_iters=500): # Check for existing Gibbs results # if os.path.exists(output_path + ".svi.pkl.gz"): # with gzip.open(output_path + ".svi.pkl.gz", 'r') as f: # print "Loading SVI results from ", (output_path + ".svi.pkl.gz") # (samples, timestamps) = cPickle.load(f) if os.path.exists(output_path + ".svi.itr%04d.pkl" % (N_iters-1)): with open(output_path + ".svi.itr%04d.pkl" % (N_iters-1), 'r') as f: print "Loading SVI results from ", (output_path + ".svi.itr%04d.pkl" % (N_iters-1)) sample = cPickle.load(f) samples = [sample] timestamps = None # (samples, timestamps) = cPickle.load(f) else: print "Fitting the data with a network Hawkes model using SVI" # Make a new model for inference test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True) network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/10.0, 'tau1': 1.0, 'tau0': 10.0, 'allow_self_connections': False} test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max, basis=test_basis, network_hypers=network_hypers) # Initialize with the standard model parameters if standard_model is not None: test_model.initialize_with_standard_model(standard_model) plt.ion() im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5) plt.pause(0.001) # Plot the block affiliations plt.figure(2) KC = np.zeros((K,C)) KC[np.arange(K), test_model.network.c] = 1.0 im_clus = plt.imshow(KC, interpolation="none", cmap="Greys", aspect=float(C)/K) # TODO: Add the data in minibatches minibatchsize = 1000 test_model.add_data(S) # Stochastic variational inference samples = [] delay = 1.0 forgetting_rate = 0.5 stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate) timestamps = [] for itr in xrange(N_iters): print "SVI Iter: ", itr, "\tStepsize: ", stepsize[itr] test_model.sgd_step(minibatchsize=minibatchsize, stepsize=stepsize[itr]) test_model.resample_from_mf() samples.append(test_model.copy_sample()) timestamps.append(time.clock()) if itr % 1 == 0: plt.figure(1) im.set_data(test_model.weight_model.expected_W()) plt.pause(0.001) plt.figure(2) im_clus.set_data(test_model.network.mf_m) plt.title("Iteration %d" % itr) plt.pause(0.001) # Save this sample with open(output_path + ".svi.itr%04d.pkl" % itr, 'w') as f: cPickle.dump(samples[-1], f, protocol=-1) # Save the Gibbs samples # with gzip.open(output_path + ".svi.pkl.gz", 'w') as f: # print "Saving SVI samples to ", (output_path + ".svi.pkl.gz") # cPickle.dump((samples, timestamps), f, protocol=-1) return samples, timestamps
def fit_network_hawkes_svi(S, K, C, dt, dt_max, output_path, standard_model=None, N_iters=100, true_network=None): """ From Scott Linderman's experiments in https://github.com/slinderman/pyhawkes/tree/master/experiments """ # Check for existing Gibbs results if os.path.exists(output_path + ".svi.pkl.gz"): with gzip.open(output_path + ".svi.pkl.gz", 'r') as f: print("Loading SVI results from ", (output_path + ".svi.pkl.gz")) (samples, timestamps) = pickle.load(f) elif os.path.exists(output_path + ".svi.itr%04d.pkl" % (N_iters - 1)): with open(output_path + ".svi.itr%04d.pkl" % (N_iters - 1), 'r') as f: print("Loading SVI results from ", (output_path + ".svi.itr%04d.pkl" % (N_iters - 1))) sample = pickle.load(f) samples = [sample] timestamps = None # (samples, timestamps) = cPickle.load(f) else: print("Fitting the data with a network Hawkes model using SVI") #------------- Make a new model for inference test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True) E_W = 0.01 kappa = 10. E_v = kappa / E_W alpha = 10. beta = alpha / E_v network_hypers = { 'C': 2, 'kappa': kappa, 'alpha': alpha, 'beta': beta, 'p': 0.8, 'allow_self_connections': False } test_model = DiscreteTimeNetworkHawkesModelGammaMixtureSBM( K=K, dt=dt, dt_max=dt_max, basis=test_basis, network_hypers=network_hypers) #------------- Initialize with the standard model parameters if standard_model is not None: test_model.initialize_with_standard_model(standard_model) minibatchsize = 3000 test_model.add_data(S) #------------- Stochastic variational inference learning with default algorithm hyperparameters samples = [] delay = 10.0 forgetting_rate = 0.5 stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate) timestamps = [] for itr in range(N_iters): print("SVI Iter: ", itr, "\tStepsize: ", stepsize[itr]) test_model.sgd_step(minibatchsize=minibatchsize, stepsize=stepsize[itr]) test_model.resample_from_mf() samples.append(test_model.copy_sample()) timestamps.append(time.clock()) with open(output_path + ".svi.itr%04d.pkl" % itr, 'w') as f: pickle.dump(samples[-1], f, protocol=-1) with gzip.open(output_path + ".svi.pkl.gz", 'w') as f: print("Saving SVI samples to ", (output_path + ".svi.pkl.gz")) pickle.dump((samples, timestamps), f, protocol=-1) return samples, timestamps