Exemple #1
0
def fit_exp_hawkes_and_simulate(train_times, decay, end_time):
    learner = HawkesExpKern(decay, verbose=True, max_iter=100000, tol=1e-10)
    learner.fit(train_times)
    score = learner.score()
    print(f'obtained {score}\n with {decay}\n')

    decay_matrix = np.full((1, 1), decay)

    simulation = SimuHawkesExpKernels(learner.adjacency,
                                      decay_matrix,
                                      baseline=learner.baseline,
                                      end_time=end_time)
    simulation.simulate()
    return learner, simulation
Exemple #2
0
def get_model(args, n_types):
    if args.model == "ERPP":
        model = ExplainableRecurrentPointProcess(n_types=n_types, **vars(args))
    elif args.model == "RPPN":
        model = RecurrentPointProcessNet(n_types=n_types, **vars(args))
    elif args.model == "HExp":
        from tick.hawkes import HawkesExpKern

        model = HawkesExpKern(args.decay, C=args.penalty, verbose=args.verbose)
    elif args.model == "HSG":
        from tick.hawkes import HawkesSumGaussians

        model = HawkesSumGaussians(
            args.max_mean,
            n_gaussians=args.n_gaussians,
            C=args.penalty,
            n_threads=args.n_threads,
            verbose=args.verbose,
        )
    elif args.model == "NPHC":
        from tick.hawkes import HawkesCumulantMatching

        model = HawkesCumulantMatching(
            integration_support=args.integration_support,
            C=args.penalty,
            verbose=args.verbose,
        )
    else:
        raise ValueError(f"Unsupported model={args.model}")

    return model
Exemple #3
0
def try_and_choose_decay(train_timestamps, decay_candidates):
    best_score = -1e100
    for i, decay in enumerate(decay_candidates):
        learner = HawkesExpKern(decay,
                                verbose=False,
                                max_iter=10000,
                                tol=1e-10)
        learner.fit(train_timestamps)
        learner_score = learner.score()
        if learner_score > best_score:
            print(f'obtained {learner_score}\n with {decay}\n')
            best_hawkes = learner
            best_score = learner_score
            selected_decay = decay
    print(f'Best score: {best_score}\n Selected decay: {selected_decay}\n')

    return selected_decay
    def learn(self, timestamps):
        gofit = 'least-squares'
        penalty = 'l2'
        C = 1e3
        solver = 'bfgs'
        step = None
        tol = 1e-05
        max_iter = 100
        verbose = False
        print_every = 10
        random_state = None
        elastic_net_ratio = 0.95

        a_kernel = HawkesExpKern(
            decays,
            gofit=gofit,
            penalty=penalty,
            C=C,
            solver=solver,
            step=step,
            tol=tol,
            max_iter=max_iter,
            verbose=verbose,
            print_every=print_every,
            # elastic_net_ratio=elastic_net_ratio,
            random_state=random_state)

        timestamps = np.array(timestamps)

        # print(timestamps)

        timestamps_list = []
        timestamps_list.append(timestamps)

        a_kernel.fit(timestamps_list)

        print("No of users: ", a_kernel.n_nodes)
        print("Estimated mu: ", a_kernel.baseline)
        print("Estimated alpha:", a_kernel.adjacency)
        print("Estimated coeffs: ", a_kernel.coeffs)

        likelihood = a_kernel.score(timestamps_list)

        print('Likelihood: ', likelihood)

        print('Negative Log likelihood: ', -np.log(likelihood))
Exemple #5
0
	def HawkesLHPick(self, Ts_Candidate, Ts_NewObs, BaselineStartTime, SimStartTime, SimEndTime, paraTuples):
		
		Ts_NewObs = np.array( Ts_NewObs );

		# Calculate the Baseline Function From SimStartTime to SimEndTime
		LikelihoodDiff = []
		# Simulated Time Series 
		for Ts_Observed, paras in zip( Ts_Candidate, paraTuples):
			
			# Get parameter
			Baseline = paras[0];
			Alpha = paras[1];
			Decay = paras[2];
			
			Ts_with_Observed = np.array( Ts_Observed );
			Ts_upto_NewObs = np.hstack( (Ts_with_Observed, Ts_NewObs) )
			# 
			# re position 
			Ts_with_Observed = Ts_with_Observed - BaselineStartTime
			Ts_upto_NewObs = Ts_upto_NewObs - BaselineStartTime
			
			# Likelihood Before
			EndTimeBefore = SimEndTime - BaselineStartTime;
			learner = HawkesExpKern(decays=Decay, penalty='l1', C=20, gofit='likelihood')
			try:
				fit_score_Before = learner.score(events=[Ts_with_Observed], end_times=EndTimeBefore, baseline=np.array([Baseline]),\
							 adjacency=np.array([[Alpha]]) )
			except:
				pdb.set_trace()
				print( Ts_with_Observed, EndTimeBefore)
				fit_score_Before = learner.score(events=[Ts_with_Observed], end_times=EndTimeBefore, baseline=np.array([Baseline]),\
                                                         adjacency=np.array([[Alpha]]) )
			# Likelihood After
			EndTimeAfter= SimEndTime + self.PeriodPerPull - BaselineStartTime;
			learner = HawkesExpKern(decays=Decay, penalty='l1', C=20, gofit='likelihood')
			try:
				fit_score_After = learner.score(events=[Ts_upto_NewObs], end_times=EndTimeAfter, baseline=np.array([Baseline]),\
							adjacency=np.array([[Alpha]]) )
			except:
				pdb.set_trace()
				print( Ts_with_Observed, EndTimeBefore)
				fit_score_After = learner.score(events=[Ts_upto_NewObs], end_times=EndTimeAfter, baseline=np.array([Baseline]),\
                                                        adjacency=np.array([[Alpha]]) )
			# Likelihood From Simulated Data
			deltaLikeHood = fit_score_After - fit_score_Before;
			LikelihoodDiff.append( deltaLikeHood )
		Idx = np.argmax(np.array( LikelihoodDiff) )	
		TimestampBest = Ts_Candidate[Idx]
		# Where BestPara is not useful
		return TimestampBest, Idx
Exemple #6
0
	def HawkesExp(self, t_in, StartTime, EndTime):
		ts_in = t_in  - StartTime;
		EndTime_temp =  EndTime - StartTime;
		#
		decays_list = [ [[10.0**ep]] for ep in np.arange(-8, 2,1)]
		baseline_list = [];
		adjacency_list = [];
		LikeScore_list = [];
		for decays in decays_list:
			learner = HawkesExpKern(decays, penalty='l2', C=1, gofit='least-squares')
			learner.fit( [ts_in] )
			baseline_list.append( learner.baseline )
			adjacency_list.append( learner.adjacency )
			LikeScore_list.append( learner.score() )
		#pdb.set_trace()
		#
		IdSelect = np.argsort( np.array(LikeScore_list) )[::-1][0];
		baseline = baseline_list[IdSelect][0].tolist()
		adjacency = adjacency_list[IdSelect][0][0].tolist()
		decays = decays_list[IdSelect][0][0].tolist()
		LikeScore = LikeScore_list[IdSelect]
		#
		Intensity = baseline + np.array( [  decays * adjacency * math.exp( -decays*(EndTime_temp-t)) for t in ts_in ] ).sum()
		return baseline, adjacency, decays, Intensity.tolist(), LikeScore
def get_hawkes_residuals(data, kernel_type, kernel_dim):
    """" gets residuals for hawkes process fit
    @param data: (numpy array) n x 3 array w/ columns: time_elapsed, pos, diff (log difference)
    @param kernel_type: (str) kernel type determining intensity decay (exp, double_exp, power_law)
    @param kernel_dim: (int) dimension of hawkes process
    @returns residuals: (list of lists) len(residuals) = kernel_dim
    """
    valid_kernels = {'exp': 1, 'power law': 1}
    if kernel_type not in valid_kernels:
        raise ValueError("provide valid kernel type")

    neg_times = data[np.where(data[:, 1] == 0), 0][0]
    pos_times = data[np.where(data[:, 1] == 1), 0][0]
    timestamps = [data[:, 0]] if kernel_dim == 1 else [neg_times, pos_times]

    if kernel_type == 'exp':
        decays = np.ones((kernel_dim, kernel_dim)) * 3.
        learner = HawkesExpKern(decays)
    else:
        decays = np.ones((1, 15)) * 3.  # sum of 15 exp() variables
        learner = HawkesSumExpKern(decays,
                                   penalty='elasticnet',
                                   elastic_net_ratio=0.8)

    learner.fit(timestamps)

    # get intensity over time
    intensity_track_step = data[-1, 0] / (data.shape[0] * 100)
    tracked_intensity, intensity_times = learner.estimated_intensity(
        timestamps, intensity_track_step)
    # want to get integral of intensity between each event
    residuals = []  # len of residuals is dimension
    for i in range(kernel_dim):
        time_pairs = [(timestamps[i][n - 1], timestamps[i][n])
                      for n in range(1, len(timestamps[i]))]
        local_residuals = []
        # this loop is slow, should replace it
        for t1, t2 in time_pairs:
            local_intensities_indices = np.where((intensity_times >= t1)
                                                 & (intensity_times <= t2))
            local_intensities = np.take(tracked_intensity[i],
                                        local_intensities_indices)
            local_residuals.append((t2 - t1) * np.mean(local_intensities))
        residuals.append(local_residuals)

    return residuals
    "split_id",
    "model",
    "metric",
    "value",
    "config",
])

for split_id, res in enumerate(results):
    for metric_name, val in res.items():

        df.loc[len(df)] = (
            time,
            dataset,
            split_id,
            "Groundtruth",
            metric_name,
            val,
            vars(args),
        )

export_csv(df, "data/output/results.csv", append=True)

if args.fit:
    with Timer("Fitting a hawkes process"):
        learner = HawkesExpKern(
            decays=np.full((args.n_types, args.n_types), args.exp_decay))
        learner.fit(timestamps)

    print(learner.baseline)
    print(learner.adjacency)
Exemple #9
0
def TrainInnerTimestampsExp2(clusters,
                             num_decays=2000,
                             decay_low=-10,
                             decay_high=10,
                             e=10):
    best_score = -1e100
    print(f"Training on {len(clusters)} clusters")
    unique_decays = int(num_decays**(1.0 / 4))
    num_decays = unique_decays**4
    decay_candidates = np.logspace(decay_low,
                                   decay_high,
                                   unique_decays,
                                   dtype=np.dtype('d'))
    print(f"Decay Range: {decay_candidates[0]} -> {decay_candidates[-1]}")
    print(f"{unique_decays} unique decays. {num_decays} total")
    best_decay = None
    score_list = np.zeros(num_decays)

    #x*e^(-xt)
    l = 0
    floaterrors = 0
    baseline_errors = 0
    for i in range(num_decays):
        s = f"Decay {i} ({format(100/num_decays*i, '.2f')}% done)"
        l = len(s)
        #print(f"{' '*l}\r", end="", flush=True)
        print(f"{' '*l}\r{s}\r", end='', flush=True)
        decay = np.ones((2, 2))
        decay[0][0] = decay_candidates[int(i / (unique_decays**3)) %
                                       unique_decays]
        decay[0][1] = decay_candidates[int(i / (unique_decays**2)) %
                                       unique_decays]
        decay[1][0] = decay_candidates[int(i / (unique_decays**1)) %
                                       unique_decays]
        decay[1][1] = decay_candidates[int(i) % unique_decays]
        prev_score = float('-inf')
        #print(decay)
        try:
            learner = HawkesExpKern(decay,
                                    penalty='l2',
                                    C=e,
                                    max_iter=1000,
                                    solver='agd',
                                    tol=1e-5)
            learner.fit(clusters)
            hawkes_score = learner.score()

            #ensure there is a non-0 baseline
            numb = 0
            for b in learner.baseline:
                if (b > 0):
                    numb += 1
            if (numb == 0):
                baseline_errors += 1
                continue

            #record the score for plotting
            score_list[i] = hawkes_score

            #record the best
            if (hawkes_score > best_score):
                best_score = hawkes_score
                best_learner = learner
                best_decay = decay

        except ZeroDivisionError:
            #print("float error");
            floaterrors += 1
            continue

    #create a score plot
    plt.plot(score_list)
    plt.xscale('log')
    plt.yscale('log')
    plt.title('decay Scores')
    plt.grid(True)
    plt.show()

    print(f"\nTraining Done")
    print(f"Float Errors: {floaterrors} ({100/num_decays*floaterrors}%)")
    print(
        f"Baseline Errors: {baseline_errors} ({100/num_decays*baseline_errors}%)"
    )
    print(
        f"==========\nSuccessful Results: {num_decays - floaterrors - baseline_errors} ({100/num_decays*(num_decays - floaterrors - baseline_errors)}%)\n==========\n"
    )

    print(f"\nBest Score: {best_score}")
    print(f"Best Decay: {best_decay}")
    plot_hawkes_kernels(best_learner)

    print(f"Adjacency: {best_learner.adjacency}")
    print(f"Baseline: {best_learner.baseline}")
    print(f"Coeffs: {best_learner.coeffs}")

    #activate this for residuals (Warning, it is REALLLLLLLLLLY SLOOOOOOOOOOOOW)
    cat_clusters = ConcatClusters(clusters, 0)
    step = 0.1
    residuals = goodness_of_fit_par(best_learner, cat_clusters, step,
                                    integrate.simps)
    plot_resid(residuals, 2, 1)

    return best_learner.adjacency, best_learner.baseline, best_decay
Exemple #10
0
def TrainInnerClusterExp(clusters,
                         num_decays=2000,
                         decay_low=-10,
                         decay_high=10):
    data = ConcatClusters(clusters, 0)
    best_score = -1e100
    #decays for multiple dimention process
    #update this to have different decays for each process
    #num_decays = 2000
    #print(f"Total decay combinations = {num_decays*num_decays*num_decays*num_decays}")
    decay_candidates = np.logspace(decay_low,
                                   decay_high,
                                   num_decays,
                                   dtype=np.dtype('d'))
    print(f"Training on {len(clusters)} clusters")
    print(f"Decay Range: {decay_candidates[0]} -> {decay_candidates[-1]}")
    best_decay = decay_candidates[0]
    score_list = np.zeros(num_decays)

    #x*e^(-xt)
    l = 0
    floaterrors = 0
    baseline_errors = 0
    for i, decay in enumerate(decay_candidates):
        decay = decay * np.ones((2, 2))
        try:
            #might need a hyperbolic kernel?
            #it seems to get too excited and decays too slowly
            #only small decay values seem to make sense
            learner = HawkesExpKern(
                decay,
                penalty='l2',
                C=1000,
                max_iter=1000,
                solver='agd',
                tol=1e-3)  #, max_iter=1000, tol=1e-5) #gofit='likelihood'
            ###Error functions
            #l1 - has 0 step errors
            #l2 - runs, but the results do not look good, heavily favours higher decay values that produce nonsense graphs
            #elasticnet (elastic_net_ratio, def 0.95) - values closer to 0 work better (since it uses l2) otherwise it produces step errors. Still similar to l2.
            #nuclear - basically the same
            #none - how can you have no penalty function?
            ###solvers
            #agd - all penalties favour super high decays, basicaly wants random event generation
            #gd - basically the same
            #bfgs - does weird things, but is quick
            #svrg

            learner.fit(data, start=learner.coeffs)
            """cluster_num = 0
            for cluster in clusters:
                if (cluster_num % 100 == 0):
                    #print out training progress
                    s = f"It: {i}, Decay: {decay[0]}, Cluster: {cluster_num}"
                    print(f"\r{' '*l}\r", end='')
                    print(f"It: {i}, Decay: {decay[0]}, Cluster: {cluster_num}", end='', flush=True)
                    l = len(s)
                learner.fit(cluster, start=learner.coeffs)
                cluster_num += 1"""
            hawkes_score = learner.score()
            #print(hawkes_score)
            #print(f"Coeffs: {learner.coeffs}")

            #ensure there is a non-0 baseline
            numb = 0
            for b in learner.baseline:
                if (b > 0):
                    numb += 1
            if (numb == 0):
                baseline_errors += 1
                continue

            #record the score for plotting
            score_list[i] = hawkes_score

            #record the best
            if (hawkes_score > best_score):
                best_score = hawkes_score
                best_learner = learner
                best_decay = decay

            step = 0.01
            #residuals = goodness_of_fit_par(learner,data,step,integrate.simps)
            #plot_resid(residuals,2,1)

        except ZeroDivisionError:
            #print("float error");
            floaterrors += 1
            continue

    #create a score plot
    plt.plot(decay_candidates, score_list)
    plt.xscale('log')
    plt.yscale('log')
    plt.title('decay Scores')
    plt.grid(True)
    plt.show()

    print(f"\nTraining Done")
    print(f"Float Errors: {floaterrors} ({100/num_decays*floaterrors}%)")
    print(
        f"Baseline Errors: {baseline_errors} ({100/num_decays*baseline_errors}%)"
    )
    print(
        f"==========\nSuccessful Results: {num_decays - floaterrors - baseline_errors} ({100/num_decays*(num_decays - floaterrors - baseline_errors)}%)\n==========\n"
    )

    print(f"\nBest Score: {best_score}")
    print(f"Best Decay: {best_decay}")
    plot_hawkes_kernels(best_learner)

    print(f"Adjacency: {best_learner.adjacency}")
    print(f"Baseline: {best_learner.baseline}")
    print(f"Coeffs: {best_learner.coeffs}")

    #return best_learner.adjacency, best_learner.baseline, best_decay
    return best_learner, best_decay
Exemple #11
0
def single_exp(decays, events):
    return -HawkesExpKern(decays=decays[0],
                          penalty='elasticnet',
                          tol=1e-8,
                          elastic_net_ratio=0.9,
                          max_iter=1000).fit(events).score()
Exemple #12
0
from tick.plot import plot_hawkes_kernels
from tick.hawkes import SimuHawkesExpKernels, SimuHawkesMulti, HawkesExpKern
import matplotlib.pyplot as plt

end_time = 1000
n_realizations = 10

decays = [[4., 1.], [2., 2.]]
baseline = [0.12, 0.07]
adjacency = [[.3, 0.], [.6, .21]]

hawkes_exp_kernels = SimuHawkesExpKernels(adjacency=adjacency,
                                          decays=decays,
                                          baseline=baseline,
                                          end_time=end_time,
                                          verbose=False,
                                          seed=1039)

multi = SimuHawkesMulti(hawkes_exp_kernels, n_simulations=n_realizations)

multi.end_time = [(i + 1) / 10 * end_time for i in range(n_realizations)]
multi.simulate()

learner = HawkesExpKern(decays, penalty='l1', C=10)
learner.fit(multi.timestamps)

plot_hawkes_kernels(learner, hawkes=hawkes_exp_kernels)
Exemple #13
0
def model_hawkes(df=None,
                 learnertype=None,
                 decay=None,
                 Dimensions=None,
                 flavor=None,
                 def_low=None,
                 def_high=None):
    path = os.path.join(os.getcwd())
    n_nodes = len(list(Dimensions.keys()))
    df = df.dropna(subset=['computed_final_score'])
    if learnertype == 'HawkesExpKern':
        try:
            p = os.path.join(path, 'results', 'tables', learnertype, flavor,
                             str(n_nodes) + '_nodes')
            p1 = os.path.join(path, 'results', 'figs', learnertype, flavor,
                              str(n_nodes) + '_nodes')
            print(p)
            os.makedirs(p)
            os.makedirs(p1)
        except FileExistsError:
            print('folders exist')
        else:
            print('created.')

        for d in decay:
            A = []
            B = []
            learner = HawkesExpKern(decays=d)
            for i in range(len(df)):
                s1 = df.iloc[i, :len(Dimensions.keys())].tolist()
                learner.fit(s1)
                A.append(learner.adjacency)
                B.append(learner.baseline)
            A_super = pd.DataFrame([list(x) for x in A])
            A_super.columns = list(Dimensions.keys())
            A_super['user_id'] = df['user_id'].tolist()
            A_super['computed_final_score'] = df[
                'computed_final_score'].tolist()
            B_super = pd.DataFrame(list(x) for x in B)
            B_super.columns = list(Dimensions.keys())
            B_super['user_id'] = df['user_id'].tolist()
            B_super['computed_final_score'] = df[
                'computed_final_score'].tolist()
            A_super.to_csv(p + '/A_' + str(d) + '.csv', index=False)
            B_super.to_csv(p + '/B_' + str(d) + '.csv', index=False)
            score = A_super['computed_final_score'].tolist()
            plot_hawkes(n_nodes, A_super, B_super, learnertype, d, score,
                        Dimensions, p1, def_low, def_high)

    elif learnertype == 'HawkesADM4':
        try:
            p = os.path.join(path, 'results', 'tables', learnertype, flavor,
                             str(n_nodes) + '_nodes')
            p1 = os.path.join(path, 'results', 'figs', learnertype, flavor,
                              str(n_nodes) + '_nodes')
            os.makedirs(p)
            os.makedirs(p1)
        except FileExistsError:
            pass
        else:
            print('folders created')

        for d in decay:
            A = []
            B = []
            learner = HawkesADM4(decay=d)
            for i in range(len(df)):
                s1 = df.iloc[i, :len(Dimensions.keys())].tolist()
                learner.fit(s1)
                A.append(learner.adjacency)
                B.append(learner.baseline)
            A_super = pd.DataFrame([list(x) for x in A])
            A_super.columns = list(Dimensions.keys())
            A_super['user_id'] = df['user_id'].tolist()
            A_super['computed_final_score'] = df[
                'computed_final_score'].tolist()
            B_super = pd.DataFrame(list(x) for x in B)
            B_super.columns = list(Dimensions.keys())
            B_super['user_id'] = df['user_id'].tolist()
            B_super['computed_final_score'] = df[
                'computed_final_score'].tolist()
            A_super.to_csv(p + '/A_' + str(d) + '.csv', index=False)
            B_super.to_csv(p + '/B_' + str(d) + '.csv', index=False)
            score = A_super['computed_final_score'].tolist()
            plot_hawkes(n_nodes, A_super, B_super, learnertype, d, score,
                        Dimensions, p1, def_low, def_high)
    else:
        print('function not implemented.')
                              p=pHat,
                              Tmax=Tmax)
PermPred = np.argmax(piEstimPErm, axis=1)
ErrorPErm = np.mean(PermPred != Ytest)

#################### PG #####################################################
#############################################################################

paramLS = [None] * Kclass
for k in range(Kclass):
    classk = (np.where(Ytrain == k))
    classk = np.array(classk[0])
    listJumptimesK = [None] * len(classk)
    for i in range(len(classk)):
        listJumptimesK[i] = [listJumptimesTrain[classk[i]]]  # tick format
    learnerLSK = HawkesExpKern(decays=0.5, gofit='least-squares')
    learnerLSK.fit(listJumptimesK)
    paramLS[k] = [
        float(learnerLSK.baseline[0]),
        float(learnerLSK.adjacency[0]), learnerLSK.decays
    ]

piEstimPlugIn = phiFtestim_expo(Jumptimes=listJumptimesTest,
                                Kclass=Kclass,
                                param=paramLS,
                                p=pHat,
                                Tmax=Tmax)
plugInPred = np.argmax(piEstimPlugIn, axis=1)
ErrorPG = np.mean(plugInPred != Ytest)

#################################################################################