コード例 #1
0
def fit_ct_network_hawkes_gibbs(S,
                                S_test,
                                dt,
                                dt_max,
                                output_path,
                                model_args={},
                                standard_model=None,
                                N_samples=100,
                                time_limit=8 * 60 * 60):

    K = S.shape[1]
    S_ct, C_ct, T = convert_discrete_to_continuous(S, dt)
    S_test_ct, C_test_ct, T_test = convert_discrete_to_continuous(S_test, dt)

    # Check for existing Gibbs results
    if os.path.exists(output_path):
        with gzip.open(output_path, 'r') as f:
            print("Loading Gibbs results from ", output_path)
            results = pickle.load(f)
    else:
        print(
            "Fitting the data with a continuous time network Hawkes model using Gibbs sampling"
        )

        test_model = \
            ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, **model_args)
        test_model.add_data(S_ct, C_ct, T)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        # Gibbs sample
        samples = []
        lps = [test_model.log_probability()]
        hlls = [
            test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test)
        ]
        times = [0]
        for _ in progprint_xrange(N_samples, perline=25):
            # Update the model
            tic = time.time()
            test_model.resample_model()
            times.append(time.time() - tic)

            samples.append(copy.deepcopy(test_model.get_parameters()))

            # Compute log probability and heldout log likelihood
            # lps.append(test_model.log_probability())
            hlls.append(
                test_model.heldout_log_likelihood(S_test_ct, C_test_ct,
                                                  T_test))

            # # Save this sample
            # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
            #     cPickle.dump(samples[-1], f, protocol=-1)

            # Check if time limit has been exceeded
            if np.sum(times) > time_limit:
                break

        # Get cumulative timestamps
        timestamps = np.cumsum(times)
        lps = np.array(lps)
        hlls = np.array(hlls)

        # Make results object
        results = Results(samples, timestamps, lps, hlls)

        # Save the Gibbs samples
        with gzip.open(output_path, 'w') as f:
            print("Saving Gibbs samples to ", output_path)
            pickle.dump(results, f, protocol=-1)

    return results
コード例 #2
0
ファイル: harness.py プロジェクト: PerryZh/pyhawkes
def fit_ct_network_hawkes_gibbs(S, S_test, dt, dt_max, output_path,
                                model_args={}, standard_model=None,
                                N_samples=100, time_limit=8*60*60):

    K = S.shape[1]
    S_ct, C_ct, T = convert_discrete_to_continuous(S, dt)
    S_test_ct, C_test_ct, T_test = convert_discrete_to_continuous(S_test, dt)

    # Check for existing Gibbs results
    if os.path.exists(output_path):
        with gzip.open(output_path, 'r') as f:
            print "Loading Gibbs results from ", output_path
            results = cPickle.load(f)
    else:
        print "Fitting the data with a continuous time network Hawkes model using Gibbs sampling"

        test_model = \
            ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, **model_args)
        test_model.add_data(S_ct, C_ct, T)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        # Gibbs sample
        samples = []
        lps = [test_model.log_probability()]
        hlls = [test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test)]
        times = [0]
        for _ in progprint_xrange(N_samples, perline=25):
            # Update the model
            tic = time.time()
            test_model.resample_model()
            times.append(time.time() - tic)

            samples.append(copy.deepcopy(test_model.get_parameters()))

            # Compute log probability and heldout log likelihood
            # lps.append(test_model.log_probability())
            hlls.append(test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test))

            # # Save this sample
            # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
            #     cPickle.dump(samples[-1], f, protocol=-1)

            # Check if time limit has been exceeded
            if np.sum(times) > time_limit:
                break

        # Get cumulative timestamps
        timestamps = np.cumsum(times)
        lps = np.array(lps)
        hlls = np.array(hlls)

        # Make results object
        results = Results(samples, timestamps, lps, hlls)

        # Save the Gibbs samples
        with gzip.open(output_path, 'w') as f:
            print "Saving Gibbs samples to ", output_path
            cPickle.dump(results, f, protocol=-1)

    return results
コード例 #3
0
results_dir = os.path.join("results", "hippocampus", "run002")
for network, name in zip(networks, names):
    results_file = os.path.join(results_dir, "%s.pkl" % name)
    if os.path.exists(results_file):
        with open(results_file, "r") as f:
            result = pickle.load(f)
            results.append(result)
        continue

    print("Fitting model with ", name, " network.")
    model = ContinuousTimeNetworkHawkesModel(
        K, dt_max=1.,
        network=network)

    model.add_data(S_train, C_train, T_train)
    model.resample_model()

    # Add the test data and then remove it. That way we can
    # efficiently compute its predictive log likelihood
    model.add_data(S_test, C_test, T - T_train)
    test_data = model.data_list.pop()

    ### Fit the model
    lls = [model.log_likelihood()]
    plls = [model.log_likelihood(test_data)]
    Weffs = []
    Ps = []
    Ls = []

    for iter in progprint_xrange(N_samples, perline=25):
        model.resample_model()
コード例 #4
0
def test_geweke():
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    K = 1
    T = 50.0
    dt = 1.0
    dt_max = 3.0
    # network_hypers = {'C': 1, 'p': 0.5, 'kappa': 3.0, 'alpha': 3.0, 'beta': 1.0/20.0}
    network_hypers = {'c': np.zeros(K, dtype=np.int), 'p': 0.5, 'kappa': 10.0, 'v': 10*3.0}
    bkgd_hypers = {"alpha": 1., "beta": 10.}
    model = ContinuousTimeNetworkHawkesModel(K=K, dt_max=dt_max,
                                             network_hypers=network_hypers)

    model.generate(T=T)

    # Gibbs sample and then generate new data
    N_samples = 1000
    samples = []
    lps = []
    for itr in progprint_xrange(N_samples, perline=50):
        # Resample the model
        model.resample_model()
        samples.append(model.copy_sample())
        lps.append(model.log_likelihood())

        # Geweke step
        model.data_list.pop()
        model.generate(T=T)


    # Compute sample statistics for second half of samples
    A_samples       = np.array([s.weight_model.A       for s in samples])
    W_samples       = np.array([s.weight_model.W       for s in samples])
    mu_samples       = np.array([s.impulse_model.mu    for s in samples])
    tau_samples       = np.array([s.impulse_model.tau  for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0   for s in samples])
    lps             = np.array(lps)


    offset = 0
    A_mean       = A_samples[offset:, ...].mean(axis=0)
    W_mean       = W_samples[offset:, ...].mean(axis=0)
    mu_mean      = mu_samples[offset:, ...].mean(axis=0)
    tau_mean     = tau_samples[offset:, ...].mean(axis=0)
    lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0)

    print("A mean:        ", A_mean)
    print("W mean:        ", W_mean)
    print("mu mean:       ", mu_mean)
    print("tau mean:      ", tau_mean)
    print("lambda0 mean:  ", lambda0_mean)


    # Plot the log probability over iterations
    plt.figure()
    plt.plot(np.arange(N_samples), lps)
    plt.xlabel("Iteration")
    plt.ylabel("Log probability")

    # Plot the histogram of bias samples
    plt.figure()
    p_lmbda0 = gamma(model.bias_model.alpha, scale=1./model.bias_model.beta)
    _, bins, _ = plt.hist(lambda0_samples[:,0], bins=50, alpha=0.5, normed=True)
    bincenters = 0.5*(bins[1:]+bins[:-1])
    plt.plot(bincenters, p_lmbda0.pdf(bincenters), 'r--', linewidth=1)
    plt.xlabel('lam0')
    plt.ylabel('p(lam0)')

    print("Expected p(A):  ", model.network.P)
    print("Empirical p(A): ", A_samples.mean(axis=0))

    # Plot the histogram of weight samples
    plt.figure()
    Aeq1 = A_samples[:,0,0] == 1
    # p_W1 = gamma(model.network.kappa, scale=1./model.network.v[0,0])

    # p_W1 = betaprime(model.network.kappa, model.network.alpha, scale=model.network.beta)
    p_W1 = gamma(model.network.kappa, scale=1./model.network.v[0,0])

    if np.sum(Aeq1) > 0:
        _, bins, _ = plt.hist(W_samples[Aeq1,0,0], bins=50, alpha=0.5, normed=True)
        bincenters = 0.5*(bins[1:]+bins[:-1])
        plt.plot(bincenters, p_W1.pdf(bincenters), 'r--', linewidth=1)
        plt.xlabel('W')
        plt.ylabel('p(W | A=1)')

    # Plot the histogram of impulse precisions
    plt.figure()
    p_tau = gamma(model.impulse_model.alpha_0, scale=1./model.impulse_model.beta_0)

    _, bins, _ = plt.hist(tau_samples[:,0,0], bins=50, alpha=0.5, normed=True)
    bincenters = 0.5*(bins[1:]+bins[:-1])
    plt.plot(bincenters, p_tau.pdf(bincenters), 'r--', linewidth=1)
    plt.xlabel('tau')
    plt.ylabel('p(tau)')

    # Plot the histogram of impulse means
    plt.figure()
    p_mu = t(df=2*model.impulse_model.alpha_0,
             loc=model.impulse_model.mu_0,
             scale=np.sqrt(model.impulse_model.beta_0/(model.impulse_model.alpha_0*model.impulse_model.lmbda_0)))

    _, bins, _ = plt.hist(mu_samples[:,0,0], bins=50, alpha=0.5, normed=True)
    bincenters = 0.5*(bins[1:]+bins[:-1])
    plt.plot(bincenters, p_mu.pdf(bincenters), 'r--', linewidth=1)
    plt.xlabel('mu')
    plt.ylabel('p(mu)')

    plt.show()
コード例 #5
0
ファイル: hippocampus.py プロジェクト: amagnosousa/pyhawkes
results_dir = os.path.join("results", "hippocampus", "run002")
for network, name in zip(networks, names):
    results_file = os.path.join(results_dir, "%s.pkl" % name)
    if os.path.exists(results_file):
        with open(results_file, "r") as f:
            result = cPickle.load(f)
            results.append(result)
        continue

    print "Fitting model with ", name, " network."
    model = ContinuousTimeNetworkHawkesModel(
        K, dt_max=1.,
        network=network)

    model.add_data(S_train, C_train, T_train)
    model.resample_model()

    # Add the test data and then remove it. That way we can
    # efficiently compute its predictive log likelihood
    model.add_data(S_test, C_test, T - T_train)
    test_data = model.data_list.pop()

    ### Fit the model
    lls = [model.log_likelihood()]
    plls = [model.log_likelihood(test_data)]
    Weffs = []
    Ps = []
    Ls = []

    for iter in progprint_xrange(N_samples, perline=25):
        model.resample_model()
コード例 #6
0
ファイル: geweke_ct_test.py プロジェクト: razhangwei/pyhawkes
def test_geweke():
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    K = 1
    T = 50.0
    dt = 1.0
    dt_max = 3.0
    # network_hypers = {'C': 1, 'p': 0.5, 'kappa': 3.0, 'alpha': 3.0, 'beta': 1.0/20.0}
    network_hypers = {"c": np.zeros(K, dtype=np.int), "p": 0.5, "kappa": 10.0, "v": 10 * 3.0}
    bkgd_hypers = {"alpha": 1.0, "beta": 10.0}
    model = ContinuousTimeNetworkHawkesModel(K=K, dt_max=dt_max, network_hypers=network_hypers)

    model.generate(T=T)

    # Gibbs sample and then generate new data
    N_samples = 1000
    samples = []
    lps = []
    for itr in progprint_xrange(N_samples, perline=50):
        # Resample the model
        model.resample_model()
        samples.append(model.copy_sample())
        lps.append(model.log_likelihood())

        # Geweke step
        model.data_list.pop()
        model.generate(T=T)

    # Compute sample statistics for second half of samples
    A_samples = np.array([s.weight_model.A for s in samples])
    W_samples = np.array([s.weight_model.W for s in samples])
    mu_samples = np.array([s.impulse_model.mu for s in samples])
    tau_samples = np.array([s.impulse_model.tau for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    lps = np.array(lps)

    offset = 0
    A_mean = A_samples[offset:, ...].mean(axis=0)
    W_mean = W_samples[offset:, ...].mean(axis=0)
    mu_mean = mu_samples[offset:, ...].mean(axis=0)
    tau_mean = tau_samples[offset:, ...].mean(axis=0)
    lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0)

    print "A mean:        ", A_mean
    print "W mean:        ", W_mean
    print "mu mean:       ", mu_mean
    print "tau mean:      ", tau_mean
    print "lambda0 mean:  ", lambda0_mean

    # Plot the log probability over iterations
    plt.figure()
    plt.plot(np.arange(N_samples), lps)
    plt.xlabel("Iteration")
    plt.ylabel("Log probability")

    # Plot the histogram of bias samples
    plt.figure()
    p_lmbda0 = gamma(model.bias_model.alpha, scale=1.0 / model.bias_model.beta)
    _, bins, _ = plt.hist(lambda0_samples[:, 0], bins=50, alpha=0.5, normed=True)
    bincenters = 0.5 * (bins[1:] + bins[:-1])
    plt.plot(bincenters, p_lmbda0.pdf(bincenters), "r--", linewidth=1)
    plt.xlabel("lam0")
    plt.ylabel("p(lam0)")

    print "Expected p(A):  ", model.network.P
    print "Empirical p(A): ", A_samples.mean(axis=0)

    # Plot the histogram of weight samples
    plt.figure()
    Aeq1 = A_samples[:, 0, 0] == 1
    # p_W1 = gamma(model.network.kappa, scale=1./model.network.v[0,0])

    # p_W1 = betaprime(model.network.kappa, model.network.alpha, scale=model.network.beta)
    p_W1 = gamma(model.network.kappa, scale=1.0 / model.network.v[0, 0])

    if np.sum(Aeq1) > 0:
        _, bins, _ = plt.hist(W_samples[Aeq1, 0, 0], bins=50, alpha=0.5, normed=True)
        bincenters = 0.5 * (bins[1:] + bins[:-1])
        plt.plot(bincenters, p_W1.pdf(bincenters), "r--", linewidth=1)
        plt.xlabel("W")
        plt.ylabel("p(W | A=1)")

    # Plot the histogram of impulse precisions
    plt.figure()
    p_tau = gamma(model.impulse_model.alpha_0, scale=1.0 / model.impulse_model.beta_0)

    _, bins, _ = plt.hist(tau_samples[:, 0, 0], bins=50, alpha=0.5, normed=True)
    bincenters = 0.5 * (bins[1:] + bins[:-1])
    plt.plot(bincenters, p_tau.pdf(bincenters), "r--", linewidth=1)
    plt.xlabel("tau")
    plt.ylabel("p(tau)")

    # Plot the histogram of impulse means
    plt.figure()
    p_mu = t(
        df=2 * model.impulse_model.alpha_0,
        loc=model.impulse_model.mu_0,
        scale=np.sqrt(model.impulse_model.beta_0 / (model.impulse_model.alpha_0 * model.impulse_model.lmbda_0)),
    )

    _, bins, _ = plt.hist(mu_samples[:, 0, 0], bins=50, alpha=0.5, normed=True)
    bincenters = 0.5 * (bins[1:] + bins[:-1])
    plt.plot(bincenters, p_mu.pdf(bincenters), "r--", linewidth=1)
    plt.xlabel("mu")
    plt.ylabel("p(mu)")

    plt.show()