Exemple #1
0
    def test_HawkesExpKern_solver_step(self):
        """...Test HawkesExpKern setting of step parameter
        of solver
        """
        for solver in solvers:
            if solver in ['bfgs']:
                msg = '^Solver "%s" has no settable step$' % solver
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    learner = HawkesExpKern(
                        self.decays,
                        solver=solver,
                        step=1,
                        **Test.specific_solver_kwargs(solver))
                    self.assertIsNone(learner.step)
            else:
                learner = HawkesExpKern(self.decays,
                                        solver=solver,
                                        step=self.float_1,
                                        **Test.specific_solver_kwargs(solver))
                self.assertEqual(learner.step, self.float_1)
                self.assertEqual(learner._solver_obj.step, self.float_1)
                learner.step = self.float_2
                self.assertEqual(learner.step, self.float_2)
                self.assertEqual(learner._solver_obj.step, self.float_2)

            if solver in ['sgd']:
                msg = '^SGD step needs to be tuned manually$'
                with self.assertWarnsRegex(RuntimeWarning, msg):
                    learner = HawkesExpKern(self.decays,
                                            solver='sgd',
                                            max_iter=1)
                    learner.fit(self.events, 0.3)
    def test_HawkesExpKern_fit(self):
        """...Test HawkesExpKern fit with different solvers
        and penalties
        """
        sto_seed = 179312
        n_nodes = 2
        events, baseline, adjacency = Test.get_train_data(n_nodes=n_nodes,
                                                          betas=self.decays)
        start = 0.3
        initial_adjacency_error = \
            Test.estimation_error(start * np.ones((n_nodes, n_nodes)),
                                  adjacency)

        for gofit in gofits:
            for penalty in penalties:
                for solver in solvers:

                    solver_kwargs = {'penalty': penalty, 'tol': 1e-10,
                                     'solver': solver, 'verbose': False,
                                     'max_iter': 10, 'gofit': gofit}

                    if penalty != 'none':
                        solver_kwargs['C'] = 50

                    if solver in ['sgd', 'svrg']:
                        solver_kwargs['random_state'] = sto_seed

                    # manually set step
                    if solver == 'sgd' and gofit == 'likelihood':
                        solver_kwargs['step'] = 3e-1
                    elif solver == 'sgd' and gofit == 'least-squares':
                        solver_kwargs['step'] = 1e-5
                    elif solver == 'svrg' and gofit == 'likelihood':
                        solver_kwargs['step'] = 1e-3
                    elif solver == 'svrg' and gofit == 'least-squares':
                        continue

                    if solver == 'bfgs':
                        # BFGS only accepts ProxZero and ProxL2sq for now
                        if penalty != 'l2':
                            continue

                    if penalty == 'nuclear':
                        # Nuclear penalty only compatible with batch solvers
                        if solver in \
                                HawkesExpKern._solvers_stochastic:
                            continue

                    learner = HawkesExpKern(self.decays,
                                            **solver_kwargs)
                    learner.fit(events, start=start)
                    adjacency_error = Test.estimation_error(
                        learner.adjacency, adjacency)
                    self.assertLess(adjacency_error,
                                    initial_adjacency_error * 0.8,
                                    "solver %s with penalty %s and "
                                    "gofit %s reached too high "
                                    "baseline error" %
                                    (solver, penalty, gofit))
Exemple #3
0
def __fit_std_Hawkes(dataset_events):
    learner = HawkesExpKern([[FITTED_BETA] * NUMBER_OF_DIMENSIONS] *
                            NUMBER_OF_DIMENSIONS)
    learner.fit(dataset_events)
    mus = learner.baseline
    betas = learner.decays
    alphas = learner.adjacency
    return mus, alphas, betas
Exemple #4
0
    def test_corresponding_simu(self):
        """...Test that the corresponding simulation object is correctly
        built
        """
        learner = HawkesExpKern(self.decays, max_iter=10)
        learner.fit(self.events)

        corresponding_simu = learner._corresponding_simu()
        self.assertEqual(corresponding_simu.decays, learner.decays)
        np.testing.assert_array_equal(corresponding_simu.baseline,
                                      learner.baseline)
        np.testing.assert_array_equal(corresponding_simu.adjacency,
                                      learner.adjacency)
Exemple #5
0
def __fit_current_quarter(dataset_events, model_type):
    # fit process to current dataset_events
    if model_type == "full":
        mus, alphas, betas = __fit_std_Hawkes(dataset_events)
    elif model_type == "baseline":
        mus = np.array([len(dim)
                        for dim in dataset_events]) / ANALYSIS_PERIOD_OFFSET
        alphas = np.zeros((NUMBER_OF_DIMENSIONS, NUMBER_OF_DIMENSIONS))
        betas = np.ones(alphas.shape)
    elif model_type == "s-e":
        mus = []
        alphas = []
        for dim_i in range(NUMBER_OF_DIMENSIONS):
            learner = HawkesExpKern([[FITTED_BETA]])
            learner.fit([dataset_events[dim_i]])
            mus.append(learner.baseline[0].tolist())
            dim_alphas = np.zeros(NUMBER_OF_DIMENSIONS)
            dim_alphas[dim_i] = learner.adjacency[0][0].tolist()
            alphas.append(dim_alphas)
        betas = np.eye(NUMBER_OF_DIMENSIONS) * FITTED_BETA
    elif model_type == "reduced":
        if EFFECT_TYPE == "s-e":
            mus, alphas, betas = __fit_std_Hawkes(dataset_events)
            for i in range(NUMBER_OF_DIMENSIONS):
                alphas[i][i] = 0
        elif EFFECT_TYPE == "late":
            mus, alphas, betas = __fit_std_Hawkes(dataset_events)
            alphas[1][3] = 0
            alphas[3][1] = 0
        elif EFFECT_TYPE == "early":
            mus, alphas, betas = __fit_std_Hawkes(dataset_events)
            alphas[0][2] = 0
            alphas[1][2] = 0
            alphas[2][0] = 0
            alphas[3][0] = 0
            alphas[1][0] = 0
    else:
        raise Exception("unknown model_type")
    return mus, alphas, betas
    def test_HawkesExpKern_score(self):
        """...Test HawkesExpKern score method
        """
        n_nodes = 2
        n_realizations = 3

        train_events = [[
            np.cumsum(np.random.rand(4 + i)) for i in range(n_nodes)
        ] for _ in range(n_realizations)]

        test_events = [[
            np.cumsum(np.random.rand(4 + i)) for i in range(n_nodes)
        ] for _ in range(n_realizations)]

        learner = HawkesExpKern(self.decays)

        msg = '^You must either call `fit` before `score` or provide events$'
        with self.assertRaisesRegex(ValueError, msg):
            learner.score()

        given_baseline = np.random.rand(n_nodes)
        given_adjacency = np.random.rand(n_nodes, n_nodes)

        learner.fit(train_events)

        train_score_current_coeffs = learner.score()
        self.assertAlmostEqual(train_score_current_coeffs, 2.0855840)

        train_score_given_coeffs = learner.score(baseline=given_baseline,
                                                 adjacency=given_adjacency)
        self.assertAlmostEqual(train_score_given_coeffs, 0.59502417)

        test_score_current_coeffs = learner.score(test_events)
        self.assertAlmostEqual(test_score_current_coeffs, 1.6001762)

        test_score_given_coeffs = learner.score(test_events,
                                                baseline=given_baseline,
                                                adjacency=given_adjacency)
        self.assertAlmostEqual(test_score_given_coeffs, 0.89322199)
Exemple #7
0
    def test_HawkesExpKern_fit_start(self):
        """...Test HawkesExpKern starting point of fit method
        """
        n_nodes = len(self.events)
        n_coefs = n_nodes + n_nodes * n_nodes
        # Do not step
        learner = HawkesExpKern(self.decays, max_iter=-1)

        learner.fit(self.events)
        np.testing.assert_array_equal(learner.coeffs, np.ones(n_coefs))

        learner.fit(self.events, start=self.float_1)
        np.testing.assert_array_equal(learner.coeffs,
                                      np.ones(n_coefs) * self.float_1)

        learner.fit(self.events, start=self.int_1)
        np.testing.assert_array_equal(learner.coeffs,
                                      np.ones(n_coefs) * self.int_1)

        random_coeffs = np.random.rand(n_coefs)
        learner.fit(self.events, start=random_coeffs)
        np.testing.assert_array_equal(learner.coeffs, random_coeffs)
 def __minimize_loglik_in_beta(event_times_dict_list, betas):
     beta_list = [[betas["beta"]] * NUMBER_OF_DIMENSIONS
                  ] * NUMBER_OF_DIMENSIONS
     learner = HawkesExpKern(beta_list)
     learner.fit(event_times_dict_list)
     return learner._solver_obj.get_history()["obj"][-1]
        learner = HawkesExpKern(beta_list)
        learner.fit(event_times_dict_list)
        return learner._solver_obj.get_history()["obj"][-1]

    fitted_betas = []
    print("starting beta fit")
    for iteration_nr in range(BETA_FIT_REPETITIONS):
        print("    beta iteration nr" + str(iteration_nr))
        fitted_beta = hyperopt.fmin(
            fn=lambda betas: __minimize_loglik_in_beta(EVENT_TIMES, betas),
            space={"beta": hyperopt.hp.uniform("beta", ZERO, MAXIMUM_BETA)},
            algo=hyperopt.tpe.suggest,
            max_evals=MAX_HYPEROPT_EVALS)["beta"]
        learner = HawkesExpKern([[fitted_beta] * NUMBER_OF_DIMENSIONS] *
                                NUMBER_OF_DIMENSIONS)
        learner.fit(EVENT_TIMES)
        fitted_betas.append({
            "beta":
            fitted_beta,
            "loglik":
            learner._solver_obj.get_history()["obj"][-1]
        })
    FITTED_BETA = min(fitted_betas, key=lambda i: i["loglik"])["beta"]
else:  # beta fitting can be computationally intensive, so here are the results
    FITTED_BETA = 2.288 if MODE == "GROW_VS_DEC" else (
        2.067 if MODE == "STEM_VS_HUMAN" else "unknown")
    if FITTED_BETA == "unknown":
        raise Exception("unknown MODE")

print("beta: {}".format(FITTED_BETA))
for dataset_type in DATASET_LIST.keys():
    PERIOD_RESULTS = []
    for time_span in range(ANALYSIS_PERIOD_START, ANALYSIS_PERIOD_END + 1):
        print("PROCESSING Q{}".format(time_span))
        parameter_results = {"mus": [], "alphas": [], "betas": []}
        for _ in range(PERMUTED_FITTING_REPETITIONS):
            __read_dataset_bound_window = functools.partial(
                __read_dataset_window, window_index=time_span)
            EVENT_TIMES = POOL.map(__read_dataset_bound_window,
                                   DATASET_LIST[dataset_type])
            EVENT_TIMES = [
                events for events in EVENT_TIMES if events is not None
            ]
            learner = HawkesExpKern([[FITTED_BETA] * NUMBER_OF_DIMENSIONS] *
                                    NUMBER_OF_DIMENSIONS)
            learner.fit(EVENT_TIMES)
            parameter_results["mus"].append(
                np.array(learner.baseline).tolist())
            parameter_results["alphas"].append(
                (learner.adjacency * np.array(learner.decays)).tolist())
            parameter_results["betas"].append(
                np.array(learner.decays).tolist())
        PERIOD_RESULTS.append({
            "mu": parameter_results["mus"],
            "alpha": parameter_results["alphas"],
            "beta": parameter_results["betas"],
            "#datasets": len(EVENT_TIMES),
            "quarter": time_span
        })
        EVENT_TIMES = None
    with open("quarter_permutation_{}_{}.json".format(dataset_type, FIT_TYPE),