Beispiel #1
0
    dt_max = 3.0
    network_hypers = {'c': np.array([0], dtype=np.int),
                      'p': 0.5, 'kappa': 3.0, 'v': 15.0}
    weight_hypers = {"kappa_0": 3.0, "nu_0": 15.0}
    model = DiscreteTimeNetworkHawkesModelGammaMixture(K=1, dt=dt, dt_max=dt_max,
                                                       weight_hypers=weight_hypers,
                                                       network_hypers=network_hypers)
    model.generate(T=T)

    # Gibbs sample and then generate new data
    N_samples = 10000
    samples = []
    lps = []
    for itr in progprint_xrange(N_samples, perline=50):
        # Resample the model
        model.resample_model(resample_network=False)
        samples.append(model.copy_sample())
        lps.append(model.log_probability())

        # Geweke step
        model.data_list.pop()
        model.generate(T=T)


    # Compute sample statistics for second half of samples
    A_samples       = np.array([s.weight_model.A     for s in samples])
    W_samples       = np.array([s.weight_model.W     for s in samples])
    g_samples       = np.array([s.impulse_model.g    for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    c_samples       = np.array([s.network.c          for s in samples])
    p_samples       = np.array([s.network.p          for s in samples])
Beispiel #2
0
def demo(seed=None):
    """
    Fit a weakly sparse
    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    ###########################################################
    # Load some example data.
    # See data/synthetic/generate.py to create more.
    ###########################################################
    data_path = os.path.join("data", "synthetic",
                             "synthetic_K20_C4_T10000.pkl.gz")
    with gzip.open(data_path, 'r') as f:
        S, true_model = cPickle.load(f)

    T = S.shape[0]
    K = true_model.K
    B = true_model.B
    dt = true_model.dt
    dt_max = true_model.dt_max

    ###########################################################
    # Initialize with MAP estimation on a standard Hawkes model
    ###########################################################
    init_with_map = True
    if init_with_map:
        init_len = T
        print "Initializing with BFGS on first ", init_len, " time bins."
        init_model = DiscreteTimeStandardHawkesModel(K=K,
                                                     dt=dt,
                                                     dt_max=dt_max,
                                                     B=B,
                                                     alpha=1.0,
                                                     beta=1.0)
        init_model.add_data(S[:init_len, :])

        init_model.initialize_to_background_rate()
        init_model.fit_with_bfgs()
    else:
        init_model = None

    ###########################################################
    # Create a test weak spike-and-slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    network_hypers = true_model.network_hypers.copy()
    network_hypers['v'] = None
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(
        K=K,
        dt=dt,
        dt_max=dt_max,
        B=B,
        basis_hypers=true_model.basis_hypers,
        bkgd_hypers=true_model.bkgd_hypers,
        impulse_hypers=true_model.impulse_hypers,
        weight_hypers=true_model.weight_hypers,
        network_hypers=network_hypers)
    test_model.add_data(S)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)

    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 500
    samples = []
    lps = []
    # plls = []
    for itr in xrange(N_samples):
        lps.append(test_model.log_probability())
        # plls.append(test_model.heldout_log_likelihood(S_test, F=F_test))
        samples.append(test_model.copy_sample())

        print ""
        print "Gibbs iteration ", itr
        print "LP: ", lps[-1]

        test_model.resample_model()

    ###########################################################
    # Analyze the samples
    ###########################################################
    N_samples = len(samples)
    A_samples = np.array([s.weight_model.A for s in samples])
    W_samples = np.array([s.weight_model.W for s in samples])
    g_samples = np.array([s.impulse_model.g for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    lps = np.array(lps)

    offset = N_samples // 2
    A_mean = A_samples[offset:, ...].mean(axis=0)
    W_mean = W_samples[offset:, ...].mean(axis=0)
    g_mean = g_samples[offset:, ...].mean(axis=0)
    lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0)

    plt.figure()
    plt.plot(np.arange(N_samples), lps, 'k')
    plt.xlabel("Iteration")
    plt.ylabel("Log probability")
    plt.show()

    # Compute the link prediction accuracy curves
    auc_init = roc_auc_score(true_model.weight_model.A.ravel(),
                             init_model.W.ravel())
    auc_A_mean = roc_auc_score(true_model.weight_model.A.ravel(),
                               A_mean.ravel())
    auc_W_mean = roc_auc_score(true_model.weight_model.A.ravel(),
                               W_mean.ravel())

    aucs = []
    for A in A_samples:
        aucs.append(roc_auc_score(true_model.weight_model.A.ravel(),
                                  A.ravel()))

    plt.figure()
    plt.plot(aucs, '-r')
    plt.plot(auc_A_mean * np.ones_like(aucs), '--r')
    plt.plot(auc_W_mean * np.ones_like(aucs), '--b')
    plt.plot(auc_init * np.ones_like(aucs), '--k')
    plt.xlabel("Iteration")
    plt.ylabel("Link prediction AUC")
    plt.show()

    plt.ioff()
    plt.show()
Beispiel #3
0
    test_model.add_data(S)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)

    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 500
    samples = []
    lps = []
    for itr in progprint_xrange(N_samples):
        lps.append(test_model.log_probability())
        samples.append(test_model.copy_sample())
        test_model.resample_model()

    ###########################################################
    # Analyze the samples
    ###########################################################
    N_samples = len(samples)
    A_samples = np.array([s.weight_model.A for s in samples])
    W_samples = np.array([s.weight_model.W for s in samples])
    g_samples = np.array([s.impulse_model.g for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    lps = np.array(lps)

    offset = N_samples // 2
    A_mean = A_samples[offset:, ...].mean(axis=0)
    W_mean = W_samples[offset:, ...].mean(axis=0)
    g_mean = g_samples[offset:, ...].mean(axis=0)
Beispiel #4
0
def demo(seed=None):
    """
    Fit a weakly sparse
    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    ###########################################################
    # Load some example data.
    # See data/synthetic/generate.py to create more.
    ###########################################################
    data_path = os.path.join("data", "synthetic", "synthetic_K20_C4_T10000.pkl.gz")
    with gzip.open(data_path, 'r') as f:
        S, true_model = cPickle.load(f)

    T      = S.shape[0]
    K      = true_model.K
    B      = true_model.B
    dt     = true_model.dt
    dt_max = true_model.dt_max

    ###########################################################
    # Initialize with MAP estimation on a standard Hawkes model
    ###########################################################
    init_with_map = True
    if init_with_map:
        init_len   = T
        print "Initializing with BFGS on first ", init_len, " time bins."
        init_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, dt_max=dt_max, B=B,
                                                     alpha=1.0, beta=1.0)
        init_model.add_data(S[:init_len, :])

        init_model.initialize_to_background_rate()
        init_model.fit_with_bfgs()
    else:
        init_model = None

    ###########################################################
    # Create a test weak spike-and-slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    network_hypers = true_model.network_hypers.copy()
    network_hypers['v'] = None
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max, B=B,
                                                            basis_hypers=true_model.basis_hypers,
                                                            bkgd_hypers=true_model.bkgd_hypers,
                                                            impulse_hypers=true_model.impulse_hypers,
                                                            weight_hypers=true_model.weight_hypers,
                                                            network_hypers=network_hypers)
    test_model.add_data(S)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)


    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 500
    samples = []
    lps = []
    # plls = []
    for itr in xrange(N_samples):
        lps.append(test_model.log_probability())
        # plls.append(test_model.heldout_log_likelihood(S_test, F=F_test))
        samples.append(test_model.copy_sample())

        print ""
        print "Gibbs iteration ", itr
        print "LP: ", lps[-1]

        test_model.resample_model()

    ###########################################################
    # Analyze the samples
    ###########################################################
    N_samples = len(samples)
    A_samples       = np.array([s.weight_model.A     for s in samples])
    W_samples       = np.array([s.weight_model.W     for s in samples])
    g_samples       = np.array([s.impulse_model.g    for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    lps             = np.array(lps)

    offset = N_samples // 2
    A_mean       = A_samples[offset:, ...].mean(axis=0)
    W_mean       = W_samples[offset:, ...].mean(axis=0)
    g_mean       = g_samples[offset:, ...].mean(axis=0)
    lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0)

    plt.figure()
    plt.plot(np.arange(N_samples), lps, 'k')
    plt.xlabel("Iteration")
    plt.ylabel("Log probability")
    plt.show()

    # Compute the link prediction accuracy curves
    auc_init = roc_auc_score(true_model.weight_model.A.ravel(),
                             init_model.W.ravel())
    auc_A_mean = roc_auc_score(true_model.weight_model.A.ravel(),
                               A_mean.ravel())
    auc_W_mean = roc_auc_score(true_model.weight_model.A.ravel(),
                               W_mean.ravel())

    aucs = []
    for A in A_samples:
        aucs.append(roc_auc_score(true_model.weight_model.A.ravel(), A.ravel()))

    plt.figure()
    plt.plot(aucs, '-r')
    plt.plot(auc_A_mean * np.ones_like(aucs), '--r')
    plt.plot(auc_W_mean * np.ones_like(aucs), '--b')
    plt.plot(auc_init * np.ones_like(aucs), '--k')
    plt.xlabel("Iteration")
    plt.ylabel("Link prediction AUC")
    plt.show()


    plt.ioff()
    plt.show()
    weight_hypers = {"kappa_0": 3.0, "nu_0": 15.0}
    model = DiscreteTimeNetworkHawkesModelGammaMixture(
        K=1,
        dt=dt,
        dt_max=dt_max,
        weight_hypers=weight_hypers,
        network_hypers=network_hypers)
    model.generate(T=T)

    # Gibbs sample and then generate new data
    N_samples = 10000
    samples = []
    lps = []
    for itr in progprint_xrange(N_samples, perline=50):
        # Resample the model
        model.resample_model(resample_network=False)
        samples.append(model.copy_sample())
        lps.append(model.log_probability())

        # Geweke step
        model.data_list.pop()
        model.generate(T=T)

    # Compute sample statistics for second half of samples
    A_samples = np.array([s.weight_model.A for s in samples])
    W_samples = np.array([s.weight_model.W for s in samples])
    g_samples = np.array([s.impulse_model.g for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    c_samples = np.array([s.network.c for s in samples])
    p_samples = np.array([s.network.p for s in samples])
    v_samples = np.array([s.network.v for s in samples])
Beispiel #6
0
def geweke_test():
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    T = 50
    dt = 1.0
    dt_max = 3.0
    network_hypers = {
        'c': np.array([0], dtype=np.int),
        'p': 0.5,
        'kappa': 3.0,
        'v': 15.0
    }
    model = DiscreteTimeNetworkHawkesModelGammaMixture(
        K=1, dt=dt, dt_max=dt_max, network_hypers=network_hypers)
    model.generate(T=T)

    # Gibbs sample and then generate new data
    N_samples = 10000
    samples = []
    lps = []
    for itr in xrange(N_samples):
        if itr % 10 == 0:
            print "Iteration: ", itr
        # Resample the model
        model.resample_model(resample_network=False)
        samples.append(model.copy_sample())
        lps.append(model.log_probability())

        # Geweke step
        model.data_list.pop()
        model.generate(T=T)

    # Compute sample statistics for second half of samples
    A_samples = np.array([s.weight_model.A for s in samples])
    W_samples = np.array([s.weight_model.W for s in samples])
    g_samples = np.array([s.impulse_model.g for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    c_samples = np.array([s.network.c for s in samples])
    p_samples = np.array([s.network.p for s in samples])
    v_samples = np.array([s.network.v for s in samples])
    lps = np.array(lps)

    offset = 0
    A_mean = A_samples[offset:, ...].mean(axis=0)
    W_mean = W_samples[offset:, ...].mean(axis=0)
    g_mean = g_samples[offset:, ...].mean(axis=0)
    lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0)

    print "A mean:        ", A_mean
    print "W mean:        ", W_mean
    print "g mean:        ", g_mean
    print "lambda0 mean:  ", lambda0_mean

    # Plot the log probability over iterations
    plt.figure()
    plt.plot(np.arange(N_samples), lps)
    plt.xlabel("Iteration")
    plt.ylabel("Log probability")

    # Plot the histogram of bias samples
    plt.figure()
    p_lmbda0 = gamma(model.bias_model.alpha, scale=1. / model.bias_model.beta)
    _, bins, _ = plt.hist(lambda0_samples[:, 0],
                          bins=20,
                          alpha=0.5,
                          normed=True)
    bincenters = 0.5 * (bins[1:] + bins[:-1])
    plt.plot(bincenters, p_lmbda0.pdf(bincenters), 'r--', linewidth=1)
    plt.xlabel('lam0')
    plt.ylabel('p(lam0)')

    print "Expected p(A):  ", model.network.P
    print "Empirical p(A): ", A_samples.mean(axis=0)

    # Plot the histogram of weight samples
    plt.figure()
    Aeq1 = A_samples[:, 0, 0] == 1
    p_W1 = gamma(model.network.kappa, scale=1. / model.network.v[0, 0])
    _, bins, _ = plt.hist(W_samples[Aeq1, 0, 0],
                          bins=20,
                          alpha=0.5,
                          normed=True)
    bincenters = 0.5 * (bins[1:] + bins[:-1])
    plt.plot(bincenters, p_W1.pdf(bincenters), 'r--', linewidth=1)
    plt.xlabel('W')
    plt.ylabel('p(W | A=1)')

    plt.figure()
    Aeq0 = A_samples[:, 0, 0] == 0
    p_W1 = gamma(model.weight_model.kappa_0,
                 scale=1. / model.weight_model.nu_0)
    _, bins, _ = plt.hist(W_samples[Aeq0, 0, 0],
                          bins=20,
                          alpha=0.5,
                          normed=True)
    bincenters = 0.5 * (bins[1:] + bins[:-1])
    plt.plot(bincenters, p_W1.pdf(bincenters), 'r--', linewidth=1)
    plt.xlabel('W')
    plt.ylabel('p(W | A=0)')

    # Plot the histogram of impulse samples
    plt.figure()
    for b in range(model.B):
        plt.subplot(1, model.B, b + 1)
        a = model.impulse_model.gamma[b]
        b = model.impulse_model.gamma.sum() - a
        p_beta11b = beta(a, b)

        _, bins, _ = plt.hist(g_samples[:, 0, 0, b],
                              bins=20,
                              alpha=0.5,
                              normed=True)
        bincenters = 0.5 * (bins[1:] + bins[:-1])
        plt.plot(bincenters, p_beta11b.pdf(bincenters), 'r--', linewidth=1)
        plt.xlabel('g_%d' % b)
        plt.ylabel('p(g_%d)' % b)

    plt.show()
Beispiel #7
0
    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)


    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 500
    samples = []
    lps = []
    for itr in progprint_xrange(N_samples):
        lps.append(test_model.log_probability())
        samples.append(test_model.copy_sample())
        test_model.resample_model()

    ###########################################################
    # Analyze the samples
    ###########################################################
    N_samples = len(samples)
    A_samples       = np.array([s.weight_model.A     for s in samples])
    W_samples       = np.array([s.weight_model.W     for s in samples])
    g_samples       = np.array([s.impulse_model.g    for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    lps             = np.array(lps)

    offset = N_samples // 2
    A_mean       = A_samples[offset:, ...].mean(axis=0)
    W_mean       = W_samples[offset:, ...].mean(axis=0)
    g_mean       = g_samples[offset:, ...].mean(axis=0)
Beispiel #8
0
def demo(seed=None):
    """
    Fit a weakly sparse
    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    ###########################################################
    # Load some example data.
    # See data/synthetic/generate.py to create more.
    ###########################################################
    data_path = os.path.join("data", "synthetic",
                             "synthetic_K20_C4_T10000.pkl.gz")
    with gzip.open(data_path, 'r') as f:
        S, true_model = cPickle.load(f)

    T = S.shape[0]
    K = true_model.K
    B = true_model.B
    dt = true_model.dt
    dt_max = true_model.dt_max

    ###########################################################
    # Initialize with MAP estimation on a standard Hawkes model
    ###########################################################
    init_with_map = True
    if init_with_map:
        init_len = T
        print "Initializing with BFGS on first ", init_len, " time bins."
        init_model = DiscreteTimeStandardHawkesModel(K=K,
                                                     dt=dt,
                                                     dt_max=dt_max,
                                                     B=B,
                                                     alpha=1.0,
                                                     beta=1.0)
        init_model.add_data(S[:init_len, :])

        init_model.initialize_to_background_rate()
        init_model.fit_with_bfgs()
    else:
        init_model = None

    ###########################################################
    # Create a test weak spike-and-slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    network_hypers = true_model.network_hypers.copy()
    network_hypers['c'] = None
    network_hypers['v'] = None
    network_hypers['m'] = None
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(
        K=K,
        dt=dt,
        dt_max=dt_max,
        B=B,
        basis_hypers=true_model.basis_hypers,
        bkgd_hypers=true_model.bkgd_hypers,
        impulse_hypers=true_model.impulse_hypers,
        weight_hypers=true_model.weight_hypers,
        network_hypers=network_hypers)
    test_model.add_data(S)
    # F_test = test_model.basis.convolve_with_basis(S_test)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)

    # Initialize plots
    ln, im_net, im_clus = initialize_plots(true_model, test_model, S)

    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 500
    samples = []
    lps = []
    # plls = []
    for itr in xrange(N_samples):
        lps.append(test_model.log_probability())
        # plls.append(test_model.heldout_log_likelihood(S_test, F=F_test))
        samples.append(test_model.copy_sample())

        print ""
        print "Gibbs iteration ", itr
        print "LP: ", lps[-1]

        test_model.resample_model()

        # Update plot
        if itr % 1 == 0:
            update_plots(itr, test_model, S, ln, im_clus, im_net)

    ###########################################################
    # Analyze the samples
    ###########################################################
    analyze_samples(true_model, init_model, samples, lps)