def fit_exp_hawkes_and_simulate(train_times, decay, end_time): learner = HawkesExpKern(decay, verbose=True, max_iter=100000, tol=1e-10) learner.fit(train_times) score = learner.score() print(f'obtained {score}\n with {decay}\n') decay_matrix = np.full((1, 1), decay) simulation = SimuHawkesExpKernels(learner.adjacency, decay_matrix, baseline=learner.baseline, end_time=end_time) simulation.simulate() return learner, simulation
def get_model(args, n_types): if args.model == "ERPP": model = ExplainableRecurrentPointProcess(n_types=n_types, **vars(args)) elif args.model == "RPPN": model = RecurrentPointProcessNet(n_types=n_types, **vars(args)) elif args.model == "HExp": from tick.hawkes import HawkesExpKern model = HawkesExpKern(args.decay, C=args.penalty, verbose=args.verbose) elif args.model == "HSG": from tick.hawkes import HawkesSumGaussians model = HawkesSumGaussians( args.max_mean, n_gaussians=args.n_gaussians, C=args.penalty, n_threads=args.n_threads, verbose=args.verbose, ) elif args.model == "NPHC": from tick.hawkes import HawkesCumulantMatching model = HawkesCumulantMatching( integration_support=args.integration_support, C=args.penalty, verbose=args.verbose, ) else: raise ValueError(f"Unsupported model={args.model}") return model
def try_and_choose_decay(train_timestamps, decay_candidates): best_score = -1e100 for i, decay in enumerate(decay_candidates): learner = HawkesExpKern(decay, verbose=False, max_iter=10000, tol=1e-10) learner.fit(train_timestamps) learner_score = learner.score() if learner_score > best_score: print(f'obtained {learner_score}\n with {decay}\n') best_hawkes = learner best_score = learner_score selected_decay = decay print(f'Best score: {best_score}\n Selected decay: {selected_decay}\n') return selected_decay
def learn(self, timestamps): gofit = 'least-squares' penalty = 'l2' C = 1e3 solver = 'bfgs' step = None tol = 1e-05 max_iter = 100 verbose = False print_every = 10 random_state = None elastic_net_ratio = 0.95 a_kernel = HawkesExpKern( decays, gofit=gofit, penalty=penalty, C=C, solver=solver, step=step, tol=tol, max_iter=max_iter, verbose=verbose, print_every=print_every, # elastic_net_ratio=elastic_net_ratio, random_state=random_state) timestamps = np.array(timestamps) # print(timestamps) timestamps_list = [] timestamps_list.append(timestamps) a_kernel.fit(timestamps_list) print("No of users: ", a_kernel.n_nodes) print("Estimated mu: ", a_kernel.baseline) print("Estimated alpha:", a_kernel.adjacency) print("Estimated coeffs: ", a_kernel.coeffs) likelihood = a_kernel.score(timestamps_list) print('Likelihood: ', likelihood) print('Negative Log likelihood: ', -np.log(likelihood))
def HawkesLHPick(self, Ts_Candidate, Ts_NewObs, BaselineStartTime, SimStartTime, SimEndTime, paraTuples): Ts_NewObs = np.array( Ts_NewObs ); # Calculate the Baseline Function From SimStartTime to SimEndTime LikelihoodDiff = [] # Simulated Time Series for Ts_Observed, paras in zip( Ts_Candidate, paraTuples): # Get parameter Baseline = paras[0]; Alpha = paras[1]; Decay = paras[2]; Ts_with_Observed = np.array( Ts_Observed ); Ts_upto_NewObs = np.hstack( (Ts_with_Observed, Ts_NewObs) ) # # re position Ts_with_Observed = Ts_with_Observed - BaselineStartTime Ts_upto_NewObs = Ts_upto_NewObs - BaselineStartTime # Likelihood Before EndTimeBefore = SimEndTime - BaselineStartTime; learner = HawkesExpKern(decays=Decay, penalty='l1', C=20, gofit='likelihood') try: fit_score_Before = learner.score(events=[Ts_with_Observed], end_times=EndTimeBefore, baseline=np.array([Baseline]),\ adjacency=np.array([[Alpha]]) ) except: pdb.set_trace() print( Ts_with_Observed, EndTimeBefore) fit_score_Before = learner.score(events=[Ts_with_Observed], end_times=EndTimeBefore, baseline=np.array([Baseline]),\ adjacency=np.array([[Alpha]]) ) # Likelihood After EndTimeAfter= SimEndTime + self.PeriodPerPull - BaselineStartTime; learner = HawkesExpKern(decays=Decay, penalty='l1', C=20, gofit='likelihood') try: fit_score_After = learner.score(events=[Ts_upto_NewObs], end_times=EndTimeAfter, baseline=np.array([Baseline]),\ adjacency=np.array([[Alpha]]) ) except: pdb.set_trace() print( Ts_with_Observed, EndTimeBefore) fit_score_After = learner.score(events=[Ts_upto_NewObs], end_times=EndTimeAfter, baseline=np.array([Baseline]),\ adjacency=np.array([[Alpha]]) ) # Likelihood From Simulated Data deltaLikeHood = fit_score_After - fit_score_Before; LikelihoodDiff.append( deltaLikeHood ) Idx = np.argmax(np.array( LikelihoodDiff) ) TimestampBest = Ts_Candidate[Idx] # Where BestPara is not useful return TimestampBest, Idx
def HawkesExp(self, t_in, StartTime, EndTime): ts_in = t_in - StartTime; EndTime_temp = EndTime - StartTime; # decays_list = [ [[10.0**ep]] for ep in np.arange(-8, 2,1)] baseline_list = []; adjacency_list = []; LikeScore_list = []; for decays in decays_list: learner = HawkesExpKern(decays, penalty='l2', C=1, gofit='least-squares') learner.fit( [ts_in] ) baseline_list.append( learner.baseline ) adjacency_list.append( learner.adjacency ) LikeScore_list.append( learner.score() ) #pdb.set_trace() # IdSelect = np.argsort( np.array(LikeScore_list) )[::-1][0]; baseline = baseline_list[IdSelect][0].tolist() adjacency = adjacency_list[IdSelect][0][0].tolist() decays = decays_list[IdSelect][0][0].tolist() LikeScore = LikeScore_list[IdSelect] # Intensity = baseline + np.array( [ decays * adjacency * math.exp( -decays*(EndTime_temp-t)) for t in ts_in ] ).sum() return baseline, adjacency, decays, Intensity.tolist(), LikeScore
def get_hawkes_residuals(data, kernel_type, kernel_dim): """" gets residuals for hawkes process fit @param data: (numpy array) n x 3 array w/ columns: time_elapsed, pos, diff (log difference) @param kernel_type: (str) kernel type determining intensity decay (exp, double_exp, power_law) @param kernel_dim: (int) dimension of hawkes process @returns residuals: (list of lists) len(residuals) = kernel_dim """ valid_kernels = {'exp': 1, 'power law': 1} if kernel_type not in valid_kernels: raise ValueError("provide valid kernel type") neg_times = data[np.where(data[:, 1] == 0), 0][0] pos_times = data[np.where(data[:, 1] == 1), 0][0] timestamps = [data[:, 0]] if kernel_dim == 1 else [neg_times, pos_times] if kernel_type == 'exp': decays = np.ones((kernel_dim, kernel_dim)) * 3. learner = HawkesExpKern(decays) else: decays = np.ones((1, 15)) * 3. # sum of 15 exp() variables learner = HawkesSumExpKern(decays, penalty='elasticnet', elastic_net_ratio=0.8) learner.fit(timestamps) # get intensity over time intensity_track_step = data[-1, 0] / (data.shape[0] * 100) tracked_intensity, intensity_times = learner.estimated_intensity( timestamps, intensity_track_step) # want to get integral of intensity between each event residuals = [] # len of residuals is dimension for i in range(kernel_dim): time_pairs = [(timestamps[i][n - 1], timestamps[i][n]) for n in range(1, len(timestamps[i]))] local_residuals = [] # this loop is slow, should replace it for t1, t2 in time_pairs: local_intensities_indices = np.where((intensity_times >= t1) & (intensity_times <= t2)) local_intensities = np.take(tracked_intensity[i], local_intensities_indices) local_residuals.append((t2 - t1) * np.mean(local_intensities)) residuals.append(local_residuals) return residuals
"split_id", "model", "metric", "value", "config", ]) for split_id, res in enumerate(results): for metric_name, val in res.items(): df.loc[len(df)] = ( time, dataset, split_id, "Groundtruth", metric_name, val, vars(args), ) export_csv(df, "data/output/results.csv", append=True) if args.fit: with Timer("Fitting a hawkes process"): learner = HawkesExpKern( decays=np.full((args.n_types, args.n_types), args.exp_decay)) learner.fit(timestamps) print(learner.baseline) print(learner.adjacency)
def TrainInnerTimestampsExp2(clusters, num_decays=2000, decay_low=-10, decay_high=10, e=10): best_score = -1e100 print(f"Training on {len(clusters)} clusters") unique_decays = int(num_decays**(1.0 / 4)) num_decays = unique_decays**4 decay_candidates = np.logspace(decay_low, decay_high, unique_decays, dtype=np.dtype('d')) print(f"Decay Range: {decay_candidates[0]} -> {decay_candidates[-1]}") print(f"{unique_decays} unique decays. {num_decays} total") best_decay = None score_list = np.zeros(num_decays) #x*e^(-xt) l = 0 floaterrors = 0 baseline_errors = 0 for i in range(num_decays): s = f"Decay {i} ({format(100/num_decays*i, '.2f')}% done)" l = len(s) #print(f"{' '*l}\r", end="", flush=True) print(f"{' '*l}\r{s}\r", end='', flush=True) decay = np.ones((2, 2)) decay[0][0] = decay_candidates[int(i / (unique_decays**3)) % unique_decays] decay[0][1] = decay_candidates[int(i / (unique_decays**2)) % unique_decays] decay[1][0] = decay_candidates[int(i / (unique_decays**1)) % unique_decays] decay[1][1] = decay_candidates[int(i) % unique_decays] prev_score = float('-inf') #print(decay) try: learner = HawkesExpKern(decay, penalty='l2', C=e, max_iter=1000, solver='agd', tol=1e-5) learner.fit(clusters) hawkes_score = learner.score() #ensure there is a non-0 baseline numb = 0 for b in learner.baseline: if (b > 0): numb += 1 if (numb == 0): baseline_errors += 1 continue #record the score for plotting score_list[i] = hawkes_score #record the best if (hawkes_score > best_score): best_score = hawkes_score best_learner = learner best_decay = decay except ZeroDivisionError: #print("float error"); floaterrors += 1 continue #create a score plot plt.plot(score_list) plt.xscale('log') plt.yscale('log') plt.title('decay Scores') plt.grid(True) plt.show() print(f"\nTraining Done") print(f"Float Errors: {floaterrors} ({100/num_decays*floaterrors}%)") print( f"Baseline Errors: {baseline_errors} ({100/num_decays*baseline_errors}%)" ) print( f"==========\nSuccessful Results: {num_decays - floaterrors - baseline_errors} ({100/num_decays*(num_decays - floaterrors - baseline_errors)}%)\n==========\n" ) print(f"\nBest Score: {best_score}") print(f"Best Decay: {best_decay}") plot_hawkes_kernels(best_learner) print(f"Adjacency: {best_learner.adjacency}") print(f"Baseline: {best_learner.baseline}") print(f"Coeffs: {best_learner.coeffs}") #activate this for residuals (Warning, it is REALLLLLLLLLLY SLOOOOOOOOOOOOW) cat_clusters = ConcatClusters(clusters, 0) step = 0.1 residuals = goodness_of_fit_par(best_learner, cat_clusters, step, integrate.simps) plot_resid(residuals, 2, 1) return best_learner.adjacency, best_learner.baseline, best_decay
def TrainInnerClusterExp(clusters, num_decays=2000, decay_low=-10, decay_high=10): data = ConcatClusters(clusters, 0) best_score = -1e100 #decays for multiple dimention process #update this to have different decays for each process #num_decays = 2000 #print(f"Total decay combinations = {num_decays*num_decays*num_decays*num_decays}") decay_candidates = np.logspace(decay_low, decay_high, num_decays, dtype=np.dtype('d')) print(f"Training on {len(clusters)} clusters") print(f"Decay Range: {decay_candidates[0]} -> {decay_candidates[-1]}") best_decay = decay_candidates[0] score_list = np.zeros(num_decays) #x*e^(-xt) l = 0 floaterrors = 0 baseline_errors = 0 for i, decay in enumerate(decay_candidates): decay = decay * np.ones((2, 2)) try: #might need a hyperbolic kernel? #it seems to get too excited and decays too slowly #only small decay values seem to make sense learner = HawkesExpKern( decay, penalty='l2', C=1000, max_iter=1000, solver='agd', tol=1e-3) #, max_iter=1000, tol=1e-5) #gofit='likelihood' ###Error functions #l1 - has 0 step errors #l2 - runs, but the results do not look good, heavily favours higher decay values that produce nonsense graphs #elasticnet (elastic_net_ratio, def 0.95) - values closer to 0 work better (since it uses l2) otherwise it produces step errors. Still similar to l2. #nuclear - basically the same #none - how can you have no penalty function? ###solvers #agd - all penalties favour super high decays, basicaly wants random event generation #gd - basically the same #bfgs - does weird things, but is quick #svrg learner.fit(data, start=learner.coeffs) """cluster_num = 0 for cluster in clusters: if (cluster_num % 100 == 0): #print out training progress s = f"It: {i}, Decay: {decay[0]}, Cluster: {cluster_num}" print(f"\r{' '*l}\r", end='') print(f"It: {i}, Decay: {decay[0]}, Cluster: {cluster_num}", end='', flush=True) l = len(s) learner.fit(cluster, start=learner.coeffs) cluster_num += 1""" hawkes_score = learner.score() #print(hawkes_score) #print(f"Coeffs: {learner.coeffs}") #ensure there is a non-0 baseline numb = 0 for b in learner.baseline: if (b > 0): numb += 1 if (numb == 0): baseline_errors += 1 continue #record the score for plotting score_list[i] = hawkes_score #record the best if (hawkes_score > best_score): best_score = hawkes_score best_learner = learner best_decay = decay step = 0.01 #residuals = goodness_of_fit_par(learner,data,step,integrate.simps) #plot_resid(residuals,2,1) except ZeroDivisionError: #print("float error"); floaterrors += 1 continue #create a score plot plt.plot(decay_candidates, score_list) plt.xscale('log') plt.yscale('log') plt.title('decay Scores') plt.grid(True) plt.show() print(f"\nTraining Done") print(f"Float Errors: {floaterrors} ({100/num_decays*floaterrors}%)") print( f"Baseline Errors: {baseline_errors} ({100/num_decays*baseline_errors}%)" ) print( f"==========\nSuccessful Results: {num_decays - floaterrors - baseline_errors} ({100/num_decays*(num_decays - floaterrors - baseline_errors)}%)\n==========\n" ) print(f"\nBest Score: {best_score}") print(f"Best Decay: {best_decay}") plot_hawkes_kernels(best_learner) print(f"Adjacency: {best_learner.adjacency}") print(f"Baseline: {best_learner.baseline}") print(f"Coeffs: {best_learner.coeffs}") #return best_learner.adjacency, best_learner.baseline, best_decay return best_learner, best_decay
def single_exp(decays, events): return -HawkesExpKern(decays=decays[0], penalty='elasticnet', tol=1e-8, elastic_net_ratio=0.9, max_iter=1000).fit(events).score()
from tick.plot import plot_hawkes_kernels from tick.hawkes import SimuHawkesExpKernels, SimuHawkesMulti, HawkesExpKern import matplotlib.pyplot as plt end_time = 1000 n_realizations = 10 decays = [[4., 1.], [2., 2.]] baseline = [0.12, 0.07] adjacency = [[.3, 0.], [.6, .21]] hawkes_exp_kernels = SimuHawkesExpKernels(adjacency=adjacency, decays=decays, baseline=baseline, end_time=end_time, verbose=False, seed=1039) multi = SimuHawkesMulti(hawkes_exp_kernels, n_simulations=n_realizations) multi.end_time = [(i + 1) / 10 * end_time for i in range(n_realizations)] multi.simulate() learner = HawkesExpKern(decays, penalty='l1', C=10) learner.fit(multi.timestamps) plot_hawkes_kernels(learner, hawkes=hawkes_exp_kernels)
def model_hawkes(df=None, learnertype=None, decay=None, Dimensions=None, flavor=None, def_low=None, def_high=None): path = os.path.join(os.getcwd()) n_nodes = len(list(Dimensions.keys())) df = df.dropna(subset=['computed_final_score']) if learnertype == 'HawkesExpKern': try: p = os.path.join(path, 'results', 'tables', learnertype, flavor, str(n_nodes) + '_nodes') p1 = os.path.join(path, 'results', 'figs', learnertype, flavor, str(n_nodes) + '_nodes') print(p) os.makedirs(p) os.makedirs(p1) except FileExistsError: print('folders exist') else: print('created.') for d in decay: A = [] B = [] learner = HawkesExpKern(decays=d) for i in range(len(df)): s1 = df.iloc[i, :len(Dimensions.keys())].tolist() learner.fit(s1) A.append(learner.adjacency) B.append(learner.baseline) A_super = pd.DataFrame([list(x) for x in A]) A_super.columns = list(Dimensions.keys()) A_super['user_id'] = df['user_id'].tolist() A_super['computed_final_score'] = df[ 'computed_final_score'].tolist() B_super = pd.DataFrame(list(x) for x in B) B_super.columns = list(Dimensions.keys()) B_super['user_id'] = df['user_id'].tolist() B_super['computed_final_score'] = df[ 'computed_final_score'].tolist() A_super.to_csv(p + '/A_' + str(d) + '.csv', index=False) B_super.to_csv(p + '/B_' + str(d) + '.csv', index=False) score = A_super['computed_final_score'].tolist() plot_hawkes(n_nodes, A_super, B_super, learnertype, d, score, Dimensions, p1, def_low, def_high) elif learnertype == 'HawkesADM4': try: p = os.path.join(path, 'results', 'tables', learnertype, flavor, str(n_nodes) + '_nodes') p1 = os.path.join(path, 'results', 'figs', learnertype, flavor, str(n_nodes) + '_nodes') os.makedirs(p) os.makedirs(p1) except FileExistsError: pass else: print('folders created') for d in decay: A = [] B = [] learner = HawkesADM4(decay=d) for i in range(len(df)): s1 = df.iloc[i, :len(Dimensions.keys())].tolist() learner.fit(s1) A.append(learner.adjacency) B.append(learner.baseline) A_super = pd.DataFrame([list(x) for x in A]) A_super.columns = list(Dimensions.keys()) A_super['user_id'] = df['user_id'].tolist() A_super['computed_final_score'] = df[ 'computed_final_score'].tolist() B_super = pd.DataFrame(list(x) for x in B) B_super.columns = list(Dimensions.keys()) B_super['user_id'] = df['user_id'].tolist() B_super['computed_final_score'] = df[ 'computed_final_score'].tolist() A_super.to_csv(p + '/A_' + str(d) + '.csv', index=False) B_super.to_csv(p + '/B_' + str(d) + '.csv', index=False) score = A_super['computed_final_score'].tolist() plot_hawkes(n_nodes, A_super, B_super, learnertype, d, score, Dimensions, p1, def_low, def_high) else: print('function not implemented.')
p=pHat, Tmax=Tmax) PermPred = np.argmax(piEstimPErm, axis=1) ErrorPErm = np.mean(PermPred != Ytest) #################### PG ##################################################### ############################################################################# paramLS = [None] * Kclass for k in range(Kclass): classk = (np.where(Ytrain == k)) classk = np.array(classk[0]) listJumptimesK = [None] * len(classk) for i in range(len(classk)): listJumptimesK[i] = [listJumptimesTrain[classk[i]]] # tick format learnerLSK = HawkesExpKern(decays=0.5, gofit='least-squares') learnerLSK.fit(listJumptimesK) paramLS[k] = [ float(learnerLSK.baseline[0]), float(learnerLSK.adjacency[0]), learnerLSK.decays ] piEstimPlugIn = phiFtestim_expo(Jumptimes=listJumptimesTest, Kclass=Kclass, param=paramLS, p=pHat, Tmax=Tmax) plugInPred = np.argmax(piEstimPlugIn, axis=1) ErrorPG = np.mean(plugInPred != Ytest) #################################################################################