def TrainInnerClusterEM(clusters, k_time=1, k_size=100): #merge all the clusters, the learner seems to only be able to fit a single long datastream num_clusters = len(clusters) data = ConcatClusters(clusters, 0) #kernel size is the granularity #kernel support is something... (is it the size of each step?) em_learner = HawkesEM(kernel_support=k_time, kernel_size=k_size, n_threads=8, verbose=True, tol=1e-5, max_iter=1000) em_learner.fit(data) """#train the em learner on each cluster cluster_num = 0 for cluster in clusters: if (cluster_num % 10 == 0): #print out training progress s = f"Cluster: {cluster_num}/{num_clusters}" print(f"\r{' '*l}\r", end='') print(f"Cluster: {cluster_num}/{num_clusters}", end='', flush=True) l = len(s) print(em_learner.baseline) print(em_learner.kernel) print("==========") if (cluster_num == 0): em_learner.fit(cluster) else: em_learner.fit(cluster, baseline_start=em_learner.baseline, kernel_start=em_learner.kernel) cluster_num += 1""" #maybe add variation in kernel sie later? #use em_learner.score() to evaluate goodness print(f"\nEM Score: {em_learner.score()}") fig = plot_hawkes_kernels(em_learner) #TODO, remove this? t = np.linspace(0, k_time, endpoint=False, num=k_size) m = [] for i in range(2): for j in range(2): m.append(max(em_learner.kernel[i][j])) #normalise to make a proper hawkes process spectral_radius = max(m) if (spectral_radius < 1): spectral_radius = 1 #create a 2x2 array of time func kernels k = [[], []] for i in range(2): for j in range(2): k[i].append( HawkesKernelTimeFunc(t_values=t, y_values=em_learner.kernel[i][j] / np.linalg.norm(em_learner.kernel[i][j]))) #return k, em_learner.baseline #the kernel, baseline return em_learner
def TrainInnerClusterBasis(clusters, k_time=1, k_size=100, num_kernels=2): num_clusters = len(clusters) #data = ConcatClusters(clusters, 0) l = 0 basis_learner = HawkesBasisKernels(kernel_support=k_time, kernel_size=k_size, n_basis=num_kernels, C=1e-3, n_threads=8, verbose=False, ode_tol=1e-5, max_iter=1000) #train the basis learner on each cluster cluster_num = 0 for cluster in clusters: if (cluster_num % 10 == 0): #print out training progress s = f"Cluster: {cluster_num}/{num_clusters}" print(f"\r{' '*l}\r", end='') print(f"Cluster: {cluster_num}/{num_clusters}", end='', flush=True) l = len(s) if (cluster_num == 0): basis_learner.fit(cluster) else: basis_learner.fit(cluster, baseline_start=basis_learner.baseline, amplitudes_start=basis_learner.amplitudes, basis_kernels_start=basis_learner.basis_kernels) cluster_num += 1 #kernel size is the granularity #kernel support is something... (is it the size of each step?) #basis_learner = HawkesBasisKernels(kernel_support=k_time, kernel_size=k_size, n_basis=num_kernels, C=1e-3, n_threads=8, verbose=True, ode_tol=1e-5, max_iter=1000) #basis_learner.fit(data) #maybe add variation in kernel sie later? #use em_learner.score() to evaluate goodness #print(f"\nEM Score: {basis_learner.score()}") #TODO, remove this? fig = plot_hawkes_kernels(basis_learner) print(basis_learner.basis_kernels) print(basis_learner.amplitudes) print(basis_learner.baseline) return None
def test_plot_hawkes_kernels(self): """...Test plot_hawkes_history rendering given a fitted Hawkes learner """ decays = np.array([1.5, 3.5]) hawkes_sumexp = HawkesSumExpKern(decays, max_iter=0) # We set some specific coeffs to be free from any future learner # modifications # With 0 iteration and coeffs as start point it should remain there coeffs = np.array( [0.99, 0.99, 0.55, 0.37, 0.39, 0.16, 0.63, 0.49, 0.49, 0.30]) hawkes_sumexp.fit(self.hawkes_simu.timestamps, start=coeffs) n_points = 10 for support in [None, 4]: fig = plot_hawkes_kernels(hawkes_sumexp, hawkes=self.hawkes_simu, show=False, n_points=n_points, support=support) if support is None: max_support = hawkes_sumexp.get_kernel_supports().max() * 1.2 else: max_support = support for i, j in itertools.product(range(self.n_nodes), repeat=2): index = i * self.n_nodes + j ax = fig.axes[index] ax_t_axis, ax_estimated_kernel = ax.lines[0].get_xydata().T t_axis = np.linspace(0, max_support, n_points) np.testing.assert_array_equal(ax_t_axis, t_axis) estimated_kernel = hawkes_sumexp.get_kernel_values( i, j, t_axis) np.testing.assert_array_equal(ax_estimated_kernel, estimated_kernel) _, ax_true_kernel = ax.lines[1].get_xydata().T true_kernel = self.hawkes_simu.kernels[i, j].get_values(t_axis) np.testing.assert_array_equal(ax_true_kernel, true_kernel)
em_2 = HawkesEM(kernel_discretization=kern_d, max_iter=10000, tol=1e-5, verbose=True, n_threads=-1) em_2.fit(multi_2_timestamps) em_2_baseline = em_2.baseline em_2_kernel = em_2.kernel em_2_score = em_2.score() # %% Show head of kernels - HawkesEM print(pd.DataFrame({'0': em_1_kernel[0, 0], 't0': em_2_kernel[0, 0]}).head(10)) # %% Plot HawkesEM 1 plot_hawkes_kernels(em_1, hawkes=hawkes_m1) # %% [markdown] # ![Tick_Figure_3_0_EM_fit.png](attachment:e79e4b3e-595b-42bd-b622-748c42b42725.png) # %% Plot HawkesEM 1 - Log plot_hawkes_kernels(em_1, hawkes=hawkes_m1, log_scale=True) # %% [markdown] # ![Tick_Figure_4_0_EM_fit_log.png](attachment:ea3e1e50-2b8d-4856-819e-ed95fdcc4425.png) # %% Plot HawkesEM 2 plot_hawkes_kernels(em_2, hawkes=hawkes_m2) # %% [markdown] # ![Tick_Figure_5_t0_EM_fit.png](attachment:634ad445-9c32-4bc2-aaa6-bb76c5913b0c.png)
kernel = HawkesKernelTimeFunc(t_values=t_values, y_values=y_values) hawkes.set_kernel(i, j, kernel) hawkes.end_time = end_time hawkes.simulate() ticks = hawkes.timestamps # And then perform estimation with two basis kernels kernel_support = 20 n_basis = 2 em = HawkesBasisKernels(kernel_support, n_basis=n_basis, kernel_size=kernel_size, C=C, n_threads=4, max_iter=max_iter, verbose=False, ode_tol=1e-5) em.fit(ticks) fig = plot_hawkes_kernels(em, hawkes=hawkes, support=19.9, show=False) for ax in fig.axes: ax.set_ylim([0, 0.025]) fig = plot_basis_kernels(em, basis_kernels=[g2, g1], show=False) for ax in fig.axes: ax.set_ylim([0, 0.5]) plt.show()
from tick.inference import HawkesSumExpKern end_time = 1000 n_realizations = 10 decays = [.5, 2., 6.] baseline = [0.12, 0.07] adjacency = [[[0, .1, .4], [.2, 0., .2]], [[0, 0, 0], [.6, .3, 0]]] hawkes_exp_kernels = SimuHawkesSumExpKernels(adjacency=adjacency, decays=decays, baseline=baseline, end_time=end_time, verbose=False, seed=1039) multi = SimuHawkesMulti(hawkes_exp_kernels, n_simulations=n_realizations) multi.end_time = [(i + 1) / 10 * end_time for i in range(n_realizations)] multi.simulate() learner = HawkesSumExpKern(decays, penalty='elasticnet', elastic_net_ratio=0.8) learner.fit(multi.timestamps) fig = plot_hawkes_kernels(learner, hawkes=hawkes_exp_kernels, show=False) for ax in fig.axes: ax.set_ylim([0., 1.]) plt.show()
def TrainInnerTimestampsExp2(clusters, num_decays=2000, decay_low=-10, decay_high=10, e=10): best_score = -1e100 print(f"Training on {len(clusters)} clusters") unique_decays = int(num_decays**(1.0 / 4)) num_decays = unique_decays**4 decay_candidates = np.logspace(decay_low, decay_high, unique_decays, dtype=np.dtype('d')) print(f"Decay Range: {decay_candidates[0]} -> {decay_candidates[-1]}") print(f"{unique_decays} unique decays. {num_decays} total") best_decay = None score_list = np.zeros(num_decays) #x*e^(-xt) l = 0 floaterrors = 0 baseline_errors = 0 for i in range(num_decays): s = f"Decay {i} ({format(100/num_decays*i, '.2f')}% done)" l = len(s) #print(f"{' '*l}\r", end="", flush=True) print(f"{' '*l}\r{s}\r", end='', flush=True) decay = np.ones((2, 2)) decay[0][0] = decay_candidates[int(i / (unique_decays**3)) % unique_decays] decay[0][1] = decay_candidates[int(i / (unique_decays**2)) % unique_decays] decay[1][0] = decay_candidates[int(i / (unique_decays**1)) % unique_decays] decay[1][1] = decay_candidates[int(i) % unique_decays] prev_score = float('-inf') #print(decay) try: learner = HawkesExpKern(decay, penalty='l2', C=e, max_iter=1000, solver='agd', tol=1e-5) learner.fit(clusters) hawkes_score = learner.score() #ensure there is a non-0 baseline numb = 0 for b in learner.baseline: if (b > 0): numb += 1 if (numb == 0): baseline_errors += 1 continue #record the score for plotting score_list[i] = hawkes_score #record the best if (hawkes_score > best_score): best_score = hawkes_score best_learner = learner best_decay = decay except ZeroDivisionError: #print("float error"); floaterrors += 1 continue #create a score plot plt.plot(score_list) plt.xscale('log') plt.yscale('log') plt.title('decay Scores') plt.grid(True) plt.show() print(f"\nTraining Done") print(f"Float Errors: {floaterrors} ({100/num_decays*floaterrors}%)") print( f"Baseline Errors: {baseline_errors} ({100/num_decays*baseline_errors}%)" ) print( f"==========\nSuccessful Results: {num_decays - floaterrors - baseline_errors} ({100/num_decays*(num_decays - floaterrors - baseline_errors)}%)\n==========\n" ) print(f"\nBest Score: {best_score}") print(f"Best Decay: {best_decay}") plot_hawkes_kernels(best_learner) print(f"Adjacency: {best_learner.adjacency}") print(f"Baseline: {best_learner.baseline}") print(f"Coeffs: {best_learner.coeffs}") #activate this for residuals (Warning, it is REALLLLLLLLLLY SLOOOOOOOOOOOOW) cat_clusters = ConcatClusters(clusters, 0) step = 0.1 residuals = goodness_of_fit_par(best_learner, cat_clusters, step, integrate.simps) plot_resid(residuals, 2, 1) return best_learner.adjacency, best_learner.baseline, best_decay
def TrainInnerClusterExp(clusters, num_decays=2000, decay_low=-10, decay_high=10): data = ConcatClusters(clusters, 0) best_score = -1e100 #decays for multiple dimention process #update this to have different decays for each process #num_decays = 2000 #print(f"Total decay combinations = {num_decays*num_decays*num_decays*num_decays}") decay_candidates = np.logspace(decay_low, decay_high, num_decays, dtype=np.dtype('d')) print(f"Training on {len(clusters)} clusters") print(f"Decay Range: {decay_candidates[0]} -> {decay_candidates[-1]}") best_decay = decay_candidates[0] score_list = np.zeros(num_decays) #x*e^(-xt) l = 0 floaterrors = 0 baseline_errors = 0 for i, decay in enumerate(decay_candidates): decay = decay * np.ones((2, 2)) try: #might need a hyperbolic kernel? #it seems to get too excited and decays too slowly #only small decay values seem to make sense learner = HawkesExpKern( decay, penalty='l2', C=1000, max_iter=1000, solver='agd', tol=1e-3) #, max_iter=1000, tol=1e-5) #gofit='likelihood' ###Error functions #l1 - has 0 step errors #l2 - runs, but the results do not look good, heavily favours higher decay values that produce nonsense graphs #elasticnet (elastic_net_ratio, def 0.95) - values closer to 0 work better (since it uses l2) otherwise it produces step errors. Still similar to l2. #nuclear - basically the same #none - how can you have no penalty function? ###solvers #agd - all penalties favour super high decays, basicaly wants random event generation #gd - basically the same #bfgs - does weird things, but is quick #svrg learner.fit(data, start=learner.coeffs) """cluster_num = 0 for cluster in clusters: if (cluster_num % 100 == 0): #print out training progress s = f"It: {i}, Decay: {decay[0]}, Cluster: {cluster_num}" print(f"\r{' '*l}\r", end='') print(f"It: {i}, Decay: {decay[0]}, Cluster: {cluster_num}", end='', flush=True) l = len(s) learner.fit(cluster, start=learner.coeffs) cluster_num += 1""" hawkes_score = learner.score() #print(hawkes_score) #print(f"Coeffs: {learner.coeffs}") #ensure there is a non-0 baseline numb = 0 for b in learner.baseline: if (b > 0): numb += 1 if (numb == 0): baseline_errors += 1 continue #record the score for plotting score_list[i] = hawkes_score #record the best if (hawkes_score > best_score): best_score = hawkes_score best_learner = learner best_decay = decay step = 0.01 #residuals = goodness_of_fit_par(learner,data,step,integrate.simps) #plot_resid(residuals,2,1) except ZeroDivisionError: #print("float error"); floaterrors += 1 continue #create a score plot plt.plot(decay_candidates, score_list) plt.xscale('log') plt.yscale('log') plt.title('decay Scores') plt.grid(True) plt.show() print(f"\nTraining Done") print(f"Float Errors: {floaterrors} ({100/num_decays*floaterrors}%)") print( f"Baseline Errors: {baseline_errors} ({100/num_decays*baseline_errors}%)" ) print( f"==========\nSuccessful Results: {num_decays - floaterrors - baseline_errors} ({100/num_decays*(num_decays - floaterrors - baseline_errors)}%)\n==========\n" ) print(f"\nBest Score: {best_score}") print(f"Best Decay: {best_decay}") plot_hawkes_kernels(best_learner) print(f"Adjacency: {best_learner.adjacency}") print(f"Baseline: {best_learner.baseline}") print(f"Coeffs: {best_learner.coeffs}") #return best_learner.adjacency, best_learner.baseline, best_decay return best_learner, best_decay
columns=event_titles) return estimated_intensity # In[8]: #[x for x in mdd_train.values] mdd_train = [np.array(x) for x in mdd_train.values.tolist()] #mdd_train = [int(x) for x in mdd_train] # In[9]: train_dim = len(mdd_train) train_dim mdd_train # In[10]: learner = hawkes.HawkesEM(train_dim, n_threads=2, verbose=True, tol=1e-3) # In[ ]: learner.fit(mdd_train) # In[ ]: plot_hawkes_kernels(learner) # In[ ]:
inter_mode=TimeFunction.InterConstRight, dt=0.1) kernel1 = HawkesKernelTimeFunc(tf1) t_values2 = np.linspace(0, 4, 20) y_values2 = np.maximum(0., np.sin(t_values2) / 4) tf2 = TimeFunction([t_values2, y_values2]) kernel2 = HawkesKernelTimeFunc(tf2) baseline = np.array([0.1, 0.3]) hawkes = SimuHawkes(baseline=baseline, end_time=run_time, verbose=False, seed=2334) hawkes.set_kernel(0, 0, kernel1) hawkes.set_kernel(0, 1, HawkesKernelExp(.5, .7)) hawkes.set_kernel(1, 1, kernel2) hawkes.simulate() em = HawkesEM(4, kernel_size=16, n_threads=8, verbose=False, tol=1e-3) em.fit(hawkes.timestamps) fig = plot_hawkes_kernels(em, hawkes=hawkes, show=False) for ax in fig.axes: ax.set_ylim([0, 1]) plt.show()
def test_HawkesEM(): print('\n##############################') print('\nstarting: test_HawkesEM()\n') run_time = 30000 t_values1 = np.array([0, 1, 1.5, 2., 3.5], dtype=float) y_values1 = np.array([0, 0.2, 0, 0.1, 0.], dtype=float) tf1 = TimeFunction( [t_values1, y_values1], inter_mode = TimeFunction.InterConstRight, dt = 0.1) kernel1 = HawkesKernelTimeFunc(tf1) t_values2 = np.linspace(0, 4, 20) y_values2 = np.maximum(0., np.sin(t_values2) / 4) tf2 = TimeFunction([t_values2, y_values2]) kernel2 = HawkesKernelTimeFunc(tf2) baseline = np.array([0.1, 0.3]) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### realizations = list() for i in range(0,1000): print( '' ) temp_seed = int(1000 + 1000 * random.random()) print('i = ' + str(i) + ', temp_seed = ' + str(temp_seed)); hawkes = SimuHawkes( baseline = baseline, end_time = run_time, verbose = False, seed = temp_seed ) hawkes.set_kernel(0, 0, kernel1) hawkes.set_kernel(0, 1, HawkesKernelExp(.5, .7)) hawkes.set_kernel(1, 1, kernel2) hawkes.simulate() temp_realization = hawkes.timestamps; print( 'i = ' + str(i) + ', ' + 'event counts = (' + str(len(temp_realization[0])) + ',' + str(len(temp_realization[1])) + ')' ); print( temp_realization ) realizations.append( temp_realization ); ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### em = HawkesEM(4, kernel_size=16, n_threads=8, verbose=False, tol=1e-3) em.fit(events = realizations) fig = plot_hawkes_kernels(em, hawkes=hawkes, show=False) outputFILE = 'test-HawkesEM.png' for ax in fig.axes: ax.set_ylim([0, 1]) plt.savefig(fname = outputFILE, bbox_inches='tight', pad_inches=0.2) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### print('\nexitng: test_HawkesEM()') print('\n##############################') return( None )
from tick.inference import HawkesSumGaussians end_time = 1000 n_nodes = 2 n_realizations = 10 n_gaussians = 5 timestamps_list = [] kernel_timefunction = HawkesKernelTimeFunc( t_values=np.array([0., .7, 2.5, 3., 4.]), y_values=np.array([.3, .03, .03, .2, 0.])) kernels = [[HawkesKernelExp(.2, 2.), HawkesKernelPowerLaw(.2, .5, 1.3)], [HawkesKernel0(), kernel_timefunction]] hawkes = SimuHawkes(baseline=[.5, .2], kernels=kernels, end_time=end_time, verbose=False, seed=1039) multi = SimuHawkesMulti(hawkes, n_simulations=n_realizations) multi.simulate() learner = HawkesSumGaussians(n_gaussians, max_iter=10) learner.fit(multi.timestamps) plot_hawkes_kernels(learner, hawkes=hawkes, support=4)
def test_HawkesEM(): print('\n##############################') print('\nstarting: test_HawkesEM()\n') run_time = 30000 t_values1 = np.array([0, 1, 1.5, 2., 3.5], dtype=float) y_values1 = np.array([0, 0.2, 0, 0.1, 0.], dtype=float) tf1 = TimeFunction([t_values1, y_values1], inter_mode=TimeFunction.InterConstRight, dt=0.1) kernel1 = HawkesKernelTimeFunc(tf1) t_values2 = np.linspace(0, 4, 20) y_values2 = np.maximum(0., np.sin(t_values2) / 4) tf2 = TimeFunction([t_values2, y_values2]) kernel2 = HawkesKernelTimeFunc(tf2) baseline = np.array([0.1, 0.3]) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### realizations = list() for i in range(0, 1000): print('') temp_seed = int(1000 + 1000 * random.random()) print('i = ' + str(i) + ', temp_seed = ' + str(temp_seed)) hawkes = SimuHawkes(baseline=baseline, end_time=run_time, verbose=False, seed=temp_seed) hawkes.set_kernel(0, 0, kernel1) hawkes.set_kernel(0, 1, HawkesKernelExp(.5, .7)) hawkes.set_kernel(1, 1, kernel2) hawkes.simulate() temp_realization = hawkes.timestamps print('i = ' + str(i) + ', ' + 'event counts = (' + str(len(temp_realization[0])) + ',' + str(len(temp_realization[1])) + ')') print(temp_realization) realizations.append(temp_realization) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### em = HawkesEM(4, kernel_size=16, n_threads=8, verbose=False, tol=1e-3) em.fit(events=realizations) fig = plot_hawkes_kernels(em, hawkes=hawkes, show=False) outputFILE = 'test-HawkesEM.png' for ax in fig.axes: ax.set_ylim([0, 1]) plt.savefig(fname=outputFILE, bbox_inches='tight', pad_inches=0.2) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### print('\nexitng: test_HawkesEM()') print('\n##############################') return (None)
baseline=[0.05, 0.05], seed=382, verbose=False) hawkes.end_time = 50000 hawkes.simulate() e = HawkesConditionalLaw(claw_method="log", delta_lag=0.1, min_lag=0.002, max_lag=100, quad_method="log", n_quad=50, min_support=0.002, max_support=support, n_threads=-1) e.incremental_fit(hawkes.timestamps) e.compute() fig = plot_hawkes_kernels(e, log_scale=True, hawkes=hawkes, show=False, min_support=0.002, support=100) for ax in fig.axes: ax.legend(loc=3) ax.set_ylim([1e-7, 1e2]) plt.show()
from tick.plot import plot_hawkes_kernels from tick.hawkes import SimuHawkesExpKernels, SimuHawkesMulti, HawkesExpKern import matplotlib.pyplot as plt end_time = 1000 n_realizations = 10 decays = [[4., 1.], [2., 2.]] baseline = [0.12, 0.07] adjacency = [[.3, 0.], [.6, .21]] hawkes_exp_kernels = SimuHawkesExpKernels(adjacency=adjacency, decays=decays, baseline=baseline, end_time=end_time, verbose=False, seed=1039) multi = SimuHawkesMulti(hawkes_exp_kernels, n_simulations=n_realizations) multi.end_time = [(i + 1) / 10 * end_time for i in range(n_realizations)] multi.simulate() learner = HawkesExpKern(decays, penalty='l1', C=10) learner.fit(multi.timestamps) plot_hawkes_kernels(learner, hawkes=hawkes_exp_kernels)