def test_HawkesExpKern_solver_step(self): """...Test HawkesExpKern setting of step parameter of solver """ for solver in solvers: if solver in ['bfgs']: msg = '^Solver "%s" has no settable step$' % solver with self.assertWarnsRegex(RuntimeWarning, msg): learner = HawkesExpKern( self.decays, solver=solver, step=1, **Test.specific_solver_kwargs(solver)) self.assertIsNone(learner.step) else: learner = HawkesExpKern(self.decays, solver=solver, step=self.float_1, **Test.specific_solver_kwargs(solver)) self.assertEqual(learner.step, self.float_1) self.assertEqual(learner._solver_obj.step, self.float_1) learner.step = self.float_2 self.assertEqual(learner.step, self.float_2) self.assertEqual(learner._solver_obj.step, self.float_2) if solver in ['sgd']: msg = '^SGD step needs to be tuned manually$' with self.assertWarnsRegex(RuntimeWarning, msg): learner = HawkesExpKern(self.decays, solver='sgd', max_iter=1) learner.fit(self.events, 0.3)
def test_HawkesExpKern_fit(self): """...Test HawkesExpKern fit with different solvers and penalties """ sto_seed = 179312 n_nodes = 2 events, baseline, adjacency = Test.get_train_data(n_nodes=n_nodes, betas=self.decays) start = 0.3 initial_adjacency_error = \ Test.estimation_error(start * np.ones((n_nodes, n_nodes)), adjacency) for gofit in gofits: for penalty in penalties: for solver in solvers: solver_kwargs = {'penalty': penalty, 'tol': 1e-10, 'solver': solver, 'verbose': False, 'max_iter': 10, 'gofit': gofit} if penalty != 'none': solver_kwargs['C'] = 50 if solver in ['sgd', 'svrg']: solver_kwargs['random_state'] = sto_seed # manually set step if solver == 'sgd' and gofit == 'likelihood': solver_kwargs['step'] = 3e-1 elif solver == 'sgd' and gofit == 'least-squares': solver_kwargs['step'] = 1e-5 elif solver == 'svrg' and gofit == 'likelihood': solver_kwargs['step'] = 1e-3 elif solver == 'svrg' and gofit == 'least-squares': continue if solver == 'bfgs': # BFGS only accepts ProxZero and ProxL2sq for now if penalty != 'l2': continue if penalty == 'nuclear': # Nuclear penalty only compatible with batch solvers if solver in \ HawkesExpKern._solvers_stochastic: continue learner = HawkesExpKern(self.decays, **solver_kwargs) learner.fit(events, start=start) adjacency_error = Test.estimation_error( learner.adjacency, adjacency) self.assertLess(adjacency_error, initial_adjacency_error * 0.8, "solver %s with penalty %s and " "gofit %s reached too high " "baseline error" % (solver, penalty, gofit))
def __fit_std_Hawkes(dataset_events): learner = HawkesExpKern([[FITTED_BETA] * NUMBER_OF_DIMENSIONS] * NUMBER_OF_DIMENSIONS) learner.fit(dataset_events) mus = learner.baseline betas = learner.decays alphas = learner.adjacency return mus, alphas, betas
def test_corresponding_simu(self): """...Test that the corresponding simulation object is correctly built """ learner = HawkesExpKern(self.decays, max_iter=10) learner.fit(self.events) corresponding_simu = learner._corresponding_simu() self.assertEqual(corresponding_simu.decays, learner.decays) np.testing.assert_array_equal(corresponding_simu.baseline, learner.baseline) np.testing.assert_array_equal(corresponding_simu.adjacency, learner.adjacency)
def __fit_current_quarter(dataset_events, model_type): # fit process to current dataset_events if model_type == "full": mus, alphas, betas = __fit_std_Hawkes(dataset_events) elif model_type == "baseline": mus = np.array([len(dim) for dim in dataset_events]) / ANALYSIS_PERIOD_OFFSET alphas = np.zeros((NUMBER_OF_DIMENSIONS, NUMBER_OF_DIMENSIONS)) betas = np.ones(alphas.shape) elif model_type == "s-e": mus = [] alphas = [] for dim_i in range(NUMBER_OF_DIMENSIONS): learner = HawkesExpKern([[FITTED_BETA]]) learner.fit([dataset_events[dim_i]]) mus.append(learner.baseline[0].tolist()) dim_alphas = np.zeros(NUMBER_OF_DIMENSIONS) dim_alphas[dim_i] = learner.adjacency[0][0].tolist() alphas.append(dim_alphas) betas = np.eye(NUMBER_OF_DIMENSIONS) * FITTED_BETA elif model_type == "reduced": if EFFECT_TYPE == "s-e": mus, alphas, betas = __fit_std_Hawkes(dataset_events) for i in range(NUMBER_OF_DIMENSIONS): alphas[i][i] = 0 elif EFFECT_TYPE == "late": mus, alphas, betas = __fit_std_Hawkes(dataset_events) alphas[1][3] = 0 alphas[3][1] = 0 elif EFFECT_TYPE == "early": mus, alphas, betas = __fit_std_Hawkes(dataset_events) alphas[0][2] = 0 alphas[1][2] = 0 alphas[2][0] = 0 alphas[3][0] = 0 alphas[1][0] = 0 else: raise Exception("unknown model_type") return mus, alphas, betas
def test_HawkesExpKern_score(self): """...Test HawkesExpKern score method """ n_nodes = 2 n_realizations = 3 train_events = [[ np.cumsum(np.random.rand(4 + i)) for i in range(n_nodes) ] for _ in range(n_realizations)] test_events = [[ np.cumsum(np.random.rand(4 + i)) for i in range(n_nodes) ] for _ in range(n_realizations)] learner = HawkesExpKern(self.decays) msg = '^You must either call `fit` before `score` or provide events$' with self.assertRaisesRegex(ValueError, msg): learner.score() given_baseline = np.random.rand(n_nodes) given_adjacency = np.random.rand(n_nodes, n_nodes) learner.fit(train_events) train_score_current_coeffs = learner.score() self.assertAlmostEqual(train_score_current_coeffs, 2.0855840) train_score_given_coeffs = learner.score(baseline=given_baseline, adjacency=given_adjacency) self.assertAlmostEqual(train_score_given_coeffs, 0.59502417) test_score_current_coeffs = learner.score(test_events) self.assertAlmostEqual(test_score_current_coeffs, 1.6001762) test_score_given_coeffs = learner.score(test_events, baseline=given_baseline, adjacency=given_adjacency) self.assertAlmostEqual(test_score_given_coeffs, 0.89322199)
def test_HawkesExpKern_fit_start(self): """...Test HawkesExpKern starting point of fit method """ n_nodes = len(self.events) n_coefs = n_nodes + n_nodes * n_nodes # Do not step learner = HawkesExpKern(self.decays, max_iter=-1) learner.fit(self.events) np.testing.assert_array_equal(learner.coeffs, np.ones(n_coefs)) learner.fit(self.events, start=self.float_1) np.testing.assert_array_equal(learner.coeffs, np.ones(n_coefs) * self.float_1) learner.fit(self.events, start=self.int_1) np.testing.assert_array_equal(learner.coeffs, np.ones(n_coefs) * self.int_1) random_coeffs = np.random.rand(n_coefs) learner.fit(self.events, start=random_coeffs) np.testing.assert_array_equal(learner.coeffs, random_coeffs)
def __minimize_loglik_in_beta(event_times_dict_list, betas): beta_list = [[betas["beta"]] * NUMBER_OF_DIMENSIONS ] * NUMBER_OF_DIMENSIONS learner = HawkesExpKern(beta_list) learner.fit(event_times_dict_list) return learner._solver_obj.get_history()["obj"][-1]
learner = HawkesExpKern(beta_list) learner.fit(event_times_dict_list) return learner._solver_obj.get_history()["obj"][-1] fitted_betas = [] print("starting beta fit") for iteration_nr in range(BETA_FIT_REPETITIONS): print(" beta iteration nr" + str(iteration_nr)) fitted_beta = hyperopt.fmin( fn=lambda betas: __minimize_loglik_in_beta(EVENT_TIMES, betas), space={"beta": hyperopt.hp.uniform("beta", ZERO, MAXIMUM_BETA)}, algo=hyperopt.tpe.suggest, max_evals=MAX_HYPEROPT_EVALS)["beta"] learner = HawkesExpKern([[fitted_beta] * NUMBER_OF_DIMENSIONS] * NUMBER_OF_DIMENSIONS) learner.fit(EVENT_TIMES) fitted_betas.append({ "beta": fitted_beta, "loglik": learner._solver_obj.get_history()["obj"][-1] }) FITTED_BETA = min(fitted_betas, key=lambda i: i["loglik"])["beta"] else: # beta fitting can be computationally intensive, so here are the results FITTED_BETA = 2.288 if MODE == "GROW_VS_DEC" else ( 2.067 if MODE == "STEM_VS_HUMAN" else "unknown") if FITTED_BETA == "unknown": raise Exception("unknown MODE") print("beta: {}".format(FITTED_BETA))
for dataset_type in DATASET_LIST.keys(): PERIOD_RESULTS = [] for time_span in range(ANALYSIS_PERIOD_START, ANALYSIS_PERIOD_END + 1): print("PROCESSING Q{}".format(time_span)) parameter_results = {"mus": [], "alphas": [], "betas": []} for _ in range(PERMUTED_FITTING_REPETITIONS): __read_dataset_bound_window = functools.partial( __read_dataset_window, window_index=time_span) EVENT_TIMES = POOL.map(__read_dataset_bound_window, DATASET_LIST[dataset_type]) EVENT_TIMES = [ events for events in EVENT_TIMES if events is not None ] learner = HawkesExpKern([[FITTED_BETA] * NUMBER_OF_DIMENSIONS] * NUMBER_OF_DIMENSIONS) learner.fit(EVENT_TIMES) parameter_results["mus"].append( np.array(learner.baseline).tolist()) parameter_results["alphas"].append( (learner.adjacency * np.array(learner.decays)).tolist()) parameter_results["betas"].append( np.array(learner.decays).tolist()) PERIOD_RESULTS.append({ "mu": parameter_results["mus"], "alpha": parameter_results["alphas"], "beta": parameter_results["betas"], "#datasets": len(EVENT_TIMES), "quarter": time_span }) EVENT_TIMES = None with open("quarter_permutation_{}_{}.json".format(dataset_type, FIT_TYPE),