Ejemplo n.º 1
0
def test_gibbs_sbm(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 10
    K = 100
    T = 1000
    dt = 1.0
    B = 3

    # Generate from a true model
    network_hypers = {'C': C, 'beta': 1.0 / K}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt=dt, B=B, network_hypers=network_hypers)
    # S,R = true_model.generate(T=T)
    c = true_model.network.c
    perm = np.argsort(c)

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A[np.ix_(perm, perm)],
                 true_model.weight_model.W[np.ix_(perm, perm)])
    plt.pause(0.001)

    # Make a new model for inference
    network_hypers = {'C': C, 'beta': 1.0 / K}
    test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt=dt, B=B, network_hypers=network_hypers)
    # test_model.add_data(S)

    # Gibbs sample
    N_samples = 10
    samples = []
    lps = []
    for itr in xrange(N_samples):
        if itr % 5 == 0:
            print "Iteration: ", itr
        samples.append(copy.deepcopy(test_model.get_parameters()))

        lps.append(test_model.log_probability())

        # Resample the network only
        test_model.network.resample(
            (true_model.weight_model.A, true_model.weight_model.W))

    plt.ioff()

    # Compute sample statistics for second half of samples
    c_samples = np.array([c for _, _, _, _, c, _, _, _ in samples])

    print "True c: ", true_model.network.c
    print "Test c: ", c_samples[-10:, :]

    # Compute the adjusted mutual info score of the clusterings
    amis = []
    arss = []
    for c in c_samples:
        amis.append(adjusted_mutual_info_score(true_model.network.c, c))
        arss.append(adjusted_rand_score(true_model.network.c, c))

    plt.figure()
    plt.plot(np.arange(N_samples), amis, '-r')
    plt.plot(np.arange(N_samples), arss, '-b')
    plt.xlabel("Iteration")
    plt.ylabel("Clustering score")
    plt.show()
Ejemplo n.º 2
0
def fit_spikeslab_network_hawkes_gibbs(S, S_test, dt, dt_max, output_path,
                                       model_args={}, standard_model=None,
                                       N_samples=100, time_limit=8*60*60):

    T,K = S.shape

    # Check for existing Gibbs results
    if os.path.exists(output_path):
        with gzip.open(output_path, 'r') as f:
            print "Loading Gibbs results from ", output_path
            results = cPickle.load(f)
    else:
        print "Fitting the data with a network Hawkes model using Gibbs sampling"

        test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max, **model_args)
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        # TODO: Precompute F_test
        F_test = test_model.basis.convolve_with_basis(S_test)


        # Gibbs sample
        samples = []
        lps = [test_model.log_probability()]
        hlls = [test_model.heldout_log_likelihood(S_test)]
        times = [0]
        for _ in progprint_xrange(N_samples, perline=10):
            # Update the model
            tic = time.time()
            test_model.resample_model()
            samples.append(copy.deepcopy(test_model.get_parameters()))
            times.append(time.time() - tic)

            # Compute log probability and heldout log likelihood
            # lps.append(test_model.log_probability())
            hlls.append(test_model.heldout_log_likelihood(S_test, F=F_test))

            # # Save this sample
            # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
            #     cPickle.dump(samples[-1], f, protocol=-1)

            # Check if time limit has been exceeded
            if np.sum(times) > time_limit:
                break

        # Get cumulative timestamps
        timestamps = np.cumsum(times)
        lps = np.array(lps)
        hlls = np.array(hlls)

        # Make results object
        results = Results(samples, timestamps, lps, hlls)

        # Save the Gibbs samples
        with gzip.open(output_path, 'w') as f:
            print "Saving Gibbs samples to ", output_path
            cPickle.dump(results, f, protocol=-1)

    return results
Ejemplo n.º 3
0
def fit_spikeslab_network_hawkes_gibbs(S,
                                       S_test,
                                       dt,
                                       dt_max,
                                       output_path,
                                       model_args={},
                                       standard_model=None,
                                       N_samples=100,
                                       time_limit=8 * 60 * 60):

    T, K = S.shape

    # Check for existing Gibbs results
    if os.path.exists(output_path):
        with gzip.open(output_path, 'r') as f:
            print("Loading Gibbs results from ", output_path)
            results = pickle.load(f)
    else:
        print(
            "Fitting the data with a network Hawkes model using Gibbs sampling"
        )

        test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K,
                                                                dt=dt,
                                                                dt_max=dt_max,
                                                                **model_args)
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        # TODO: Precompute F_test
        F_test = test_model.basis.convolve_with_basis(S_test)

        # Gibbs sample
        samples = []
        lps = [test_model.log_probability()]
        hlls = [test_model.heldout_log_likelihood(S_test)]
        times = [0]
        for _ in progprint_xrange(N_samples, perline=10):
            # Update the model
            tic = time.time()
            test_model.resample_model()
            samples.append(copy.deepcopy(test_model.get_parameters()))
            times.append(time.time() - tic)

            # Compute log probability and heldout log likelihood
            # lps.append(test_model.log_probability())
            hlls.append(test_model.heldout_log_likelihood(S_test, F=F_test))

            # # Save this sample
            # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
            #     cPickle.dump(samples[-1], f, protocol=-1)

            # Check if time limit has been exceeded
            if np.sum(times) > time_limit:
                break

        # Get cumulative timestamps
        timestamps = np.cumsum(times)
        lps = np.array(lps)
        hlls = np.array(hlls)

        # Make results object
        results = Results(samples, timestamps, lps, hlls)

        # Save the Gibbs samples
        with gzip.open(output_path, 'w') as f:
            print("Saving Gibbs samples to ", output_path)
            pickle.dump(results, f, protocol=-1)

    return results
Ejemplo n.º 4
0
def test_gibbs_sbm(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 10
    K = 100
    T = 1000
    dt = 1.0
    B = 3

    # Generate from a true model
    network_hypers = {'C': C, 'beta': 1.0/K}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, B=B,
                                                            network_hypers=network_hypers)
    # S,R = true_model.generate(T=T)
    c = true_model.network.c
    perm = np.argsort(c)

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A[np.ix_(perm, perm)],
                 true_model.weight_model.W[np.ix_(perm, perm)])
    plt.pause(0.001)


    # Make a new model for inference
    network_hypers = {'C': C, 'beta': 1.0/K}
    test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, B=B,
                                                            network_hypers=network_hypers)
    # test_model.add_data(S)

    # Gibbs sample
    N_samples = 10
    samples = []
    lps = []
    for itr in xrange(N_samples):
        if itr % 5 == 0:
            print "Iteration: ", itr
        samples.append(copy.deepcopy(test_model.get_parameters()))

        lps.append(test_model.log_probability())

        # Resample the network only
        test_model.network.resample((true_model.weight_model.A,
                                     true_model.weight_model.W))

    plt.ioff()

    # Compute sample statistics for second half of samples
    c_samples       = np.array([c for _,_,_,_,c,_,_,_ in samples])

    print "True c: ", true_model.network.c
    print "Test c: ", c_samples[-10:, :]

    # Compute the adjusted mutual info score of the clusterings
    amis = []
    arss = []
    for c in c_samples:
        amis.append(adjusted_mutual_info_score(true_model.network.c, c))
        arss.append(adjusted_rand_score(true_model.network.c, c))

    plt.figure()
    plt.plot(np.arange(N_samples), amis, '-r')
    plt.plot(np.arange(N_samples), arss, '-b')
    plt.xlabel("Iteration")
    plt.ylabel("Clustering score")
    plt.show()