Beispiel #1
0
def initialize_plots(true_model, test_model, S):
    K = true_model.K
    C = true_model.C
    R = true_model.compute_rate(S=S)
    T = S.shape[0]
    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A, true_model.weight_model.W)
    plt.pause(0.001)

    # Plot the true and inferred firing rate
    plt.figure(2)
    plt.plot(np.arange(T), R[:, 0], "-k", lw=2)
    plt.ion()
    ln = plt.plot(np.arange(T), test_model.compute_rate()[:, 0], "-r")[0]
    plt.show()

    # Plot the block affiliations
    plt.figure(3)
    KC = np.zeros((K, C))
    KC[np.arange(K), test_model.network.c] = 1.0
    im_clus = plt.imshow(KC, interpolation="none", cmap="Greys", aspect=float(C) / K)

    im_net = plot_network(np.ones((K, K)), test_model.weight_model.W_effective, vmax=0.5)
    plt.pause(0.001)

    plt.show()
    plt.pause(0.001)

    return ln, im_net, im_clus
Beispiel #2
0
def initialize_plots(true_model, test_model, S):
    K = true_model.K
    C = true_model.C
    R = true_model.compute_rate(S=S)
    T = S.shape[0]
    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A, true_model.weight_model.W)
    plt.pause(0.001)

    # Plot the true and inferred firing rate
    plt.figure(2)
    plt.plot(np.arange(T), R[:, 0], '-k', lw=2)
    plt.ion()
    ln = plt.plot(np.arange(T), test_model.compute_rate()[:, 0], '-r')[0]
    plt.show()

    # Plot the block affiliations
    plt.figure(3)
    KC = np.zeros((K, C))
    KC[np.arange(K), test_model.network.c] = 1.0
    im_clus = plt.imshow(KC,
                         interpolation="none",
                         cmap="Greys",
                         aspect=float(C) / K)

    im_net = plot_network(np.ones((K, K)),
                          test_model.weight_model.W_effective,
                          vmax=0.5)
    plt.pause(0.001)

    plt.show()
    plt.pause(0.001)

    return ln, im_net, im_clus
def fit_standard_hawkes_model_sgd(S, K, B, dt, dt_max, init_model=None):
    """
    Fit
    :param S:
    :return:
    """
    print("Fitting the data with a standard Hawkes model using SGD")

    # Make a new model for inference
    test_model = DiscreteTimeStandardHawkesModel(K=K,
                                                 dt=dt,
                                                 dt_max=dt_max,
                                                 B=B)
    test_model.add_data(S, minibatchsize=256)

    # Initialize the test model with the init model weights
    if init_model is not None:
        test_model.weights = init_model.weights

    plt.ion()
    im = plot_network(np.ones((K, K)), test_model.W, vmax=0.5)
    plt.pause(0.001)

    # Gradient descent
    N_steps = 1000
    samples = []
    lls = []
    timestamps = []

    learning_rate = 0.01 * np.ones(N_steps)
    momentum = 0.8 * np.ones(N_steps)
    prev_velocity = None
    for itr in range(N_steps):
        # W,ll,grad = test_model.gradient_descent_step(stepsz=0.001)
        W, ll, prev_velocity = test_model.sgd_step(prev_velocity,
                                                   learning_rate[itr],
                                                   momentum[itr])
        samples.append(test_model.copy_sample())
        lls.append(ll)
        timestamps.append(time.clock())

        if itr % 1 == 0:
            print("Iteration ", itr, "\t LL: ", ll)
            im.set_data(np.ones((K, K)) * test_model.W)
            plt.pause(0.001)

    plt.ioff()
    plt.figure()
    plt.plot(np.arange(N_steps), lls)
    plt.xlabel("Iteration")
    plt.ylabel("Log likelihood")

    plot_network(np.ones((K, K)), test_model.W)
    plt.show()

    return samples, timestamps
def demo(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 1       # Number of clusters in the true data
    K = 10      # Number of nodes
    T = 1000    # Number of time bins to simulate
    dt = 0.02   # Time bin size
    dt_max = 0.08
    B = 3       # Number of basis functions

    # Sample from a sparse network Hawkes model
    S, true_model = sample_from_network_hawkes(C, K, T, dt, dt_max, B)

    # Make a new model for inference
    test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=False)
    test_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, dt_max=dt_max+dt,
                                                 beta=1.0,
                                                 basis=test_basis,
                                                 allow_self_connections=True)
    test_model.add_data(S)

    # DEBUG: Initialize with the true parameters of the network Hawkes model
    # test_model.initialize_with_gibbs_model(true_model)

    test_model.fit_with_bfgs()

    print "W true:        ", true_model.weight_model.A * true_model.weight_model.W
    print "lambda0 true:  ", true_model.bias_model.lambda0
    print "ll true:       ", true_model.log_likelihood()
    print ""
    print "W test:        ", test_model.W
    print "lambda0 test   ", test_model.bias
    print "ll test:       ", test_model.log_likelihood()

    plot_network(np.ones((K,K)), test_model.W, vmax=0.5)

    # Plot the rates
    plt.figure()
    for k in xrange(3):
        plt.subplot(3,1,k+1)
        plt.plot(np.arange(T) * dt, true_model.compute_rate(proc=k), '-b')
        plt.plot(np.arange(T) * dt, test_model.compute_rate(ks=k), '-r')

    plt.ioff()
    plt.show()
def fit_standard_hawkes_model_sgd(S, K, B, dt, dt_max, init_model=None):
    """
    Fit
    :param S:
    :return:
    """
    print "Fitting the data with a standard Hawkes model using SGD"

    # Make a new model for inference
    test_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, dt_max=dt_max, B=B)
    test_model.add_data(S, minibatchsize=256)

    # Initialize the test model with the init model weights
    if init_model is not None:
        test_model.weights = init_model.weights

    plt.ion()
    im = plot_network(np.ones((K,K)), test_model.W, vmax=0.5)
    plt.pause(0.001)

    # Gradient descent
    N_steps = 1000
    samples = []
    lls = []
    timestamps = []

    learning_rate = 0.01 * np.ones(N_steps)
    momentum = 0.8 * np.ones(N_steps)
    prev_velocity = None
    for itr in xrange(N_steps):
        # W,ll,grad = test_model.gradient_descent_step(stepsz=0.001)
        W,ll,prev_velocity = test_model.sgd_step(prev_velocity, learning_rate[itr], momentum[itr])
        samples.append(test_model.copy_sample())
        lls.append(ll)
        timestamps.append(time.clock())

        if itr % 1 == 0:
            print "Iteration ", itr, "\t LL: ", ll
            im.set_data(np.ones((K,K)) * test_model.W)
            plt.pause(0.001)

    plt.ioff()
    plt.figure()
    plt.plot(np.arange(N_steps), lls)
    plt.xlabel("Iteration")
    plt.ylabel("Log likelihood")

    plot_network(np.ones((K,K)), test_model.W)
    plt.show()

    return samples, timestamps
def fit_network_hawkes_gibbs(S, K, C, dt, dt_max,
                             output_path,
                             standard_model=None):

    # Check for existing Gibbs results
    if os.path.exists(output_path + ".gibbs.pkl"):
        with open(output_path + ".gibbs.pkl", 'r') as f:
            print "Loading Gibbs results from ", (output_path + ".gibbs.pkl")
            (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using Gibbs sampling"

        # Make a new model for inference
        # test_model = DiscreteTimeNetworkHawkesModelGammaMixture(C=C, K=K, dt=dt, dt_max=dt_max, B=B,
        #                                                         alpha=1.0, beta=1.0/20.0)
        test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True)
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/10.0,
                          'tau1': 1.0, 'tau0': 10.0,
                          'allow_self_connections': False}
        test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max,
                                                                basis=test_basis,
                                                                network_hypers=network_hypers)
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5)
        plt.pause(0.001)

        # Gibbs sample
        N_samples = 100
        samples = []
        lps = [test_model.log_probability()]
        timestamps = []
        for itr in xrange(N_samples):
            if itr % 1 == 0:
                print "Iteration ", itr, "\tLL: ", lps[-1]
                im.set_data(test_model.weight_model.W_effective)
                plt.pause(0.001)

            # lps.append(test_model.log_probability())
            lps.append(test_model.log_probability())
            samples.append(test_model.resample_and_copy())
            timestamps.append(time.clock())

            # Save this sample
            with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump(samples[-1], f, protocol=-1)

        # Save the Gibbs samples
        with open(output_path + ".gibbs.pkl", 'w') as f:
            print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl")
            cPickle.dump((samples, timestamps), f, protocol=-1)

    return samples, timestamps
def fit_network_hawkes_svi(S, K, C, B, dt, dt_max,
                           output_path,
                           standard_model=None):

    samples_and_timestamps = load_partial_results(output_path, typ="svi2")
    if samples_and_timestamps is not None:
        samples, timestamps = samples_and_timestamps

    else:
        print "Fitting the data with a network Hawkes model using SVI"

        # Make a new model for inference
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/20.0}
        test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max, B=B,
                                                                network_hypers=network_hypers)
        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5)
        plt.pause(0.001)

        # TODO: Add the data in minibatches
        minibatchsize = 500
        import pdb; pdb.set_trace()
        test_model.add_data(S)


        # Stochastic variational inference
        N_iters = 10000
        samples = []
        delay = 1.0
        forgetting_rate = 0.5
        stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate)
        start = time.clock()
        timestamps = []
        for itr in xrange(N_iters):
            print "SVI Iter: ", itr, "\tStepsize: ", stepsize[itr]
            test_model.sgd_step(minibatchsize=minibatchsize, stepsize=stepsize[itr])
            test_model.resample_from_mf()
            samples.append(test_model.copy_sample())
            timestamps.append(time.clock())

            if itr % 1 == 0:
                im.set_data(test_model.weight_model.expected_W())
                plt.pause(0.001)

            # Save this sample
            with open(output_path + ".svi.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump((samples[-1], timestamps[-1] -start), f, protocol=-1)

        # Save the Gibbs samples
        timestamps = np.array(timestamps)
        with gzip.open(output_path + ".svi.pkl.gz", 'w') as f:
            print "Saving SVI samples to ", (output_path + ".svi.pkl.gz")
            cPickle.dump((samples, timestamps - start), f, protocol=-1)

    return samples, timestamps
Beispiel #8
0
def sample_from_network_hawkes(C, K, T, dt, B):
    # Create a true model
    p = 0.8 * np.eye(C)
    v = 10.0 * np.eye(C) + 20.0 * (1-np.eye(C))
    c = (0.0 * (np.arange(K) < 10) + 1.0 * (np.arange(K)  >= 10)).astype(np.int)
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(C=C, K=K, dt=dt, B=B, c=c, p=p, v=v)

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A,
                 true_model.weight_model.W,
                 vmax=0.5)

    # Sample from the true model
    S,R = true_model.generate(T=T)

    # Return the spike count matrix
    return S, R, true_model
def sample_from_network_hawkes(C, K, T, dt, dt_max, B):
    # Create a true model
    p = 0.8 * np.eye(C)
    v = 10.0 * np.eye(C) + 20.0 * (1-np.eye(C))
    c = (0.0 * (np.arange(K) < 10) + 1.0 * (np.arange(K)  >= 10)).astype(np.int)
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(C=C, K=K, dt=dt, dt_max=dt_max,
                                                            B=B, c=c, p=p, v=v)

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A,
                 true_model.weight_model.W,
                 vmax=0.5)

    # Sample from the true model
    S,R = true_model.generate(T=T)

    # Return the spike count matrix
    return S, true_model
Beispiel #10
0
def initialize_plots(true_model, test_model, S):
    K = true_model.K
    C = true_model.C
    R = true_model.compute_rate(S=S)
    T = S.shape[0]

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A, true_model.weight_model.W)
    plt.pause(0.001)

    # Plot the true and inferred firing rate
    plt.figure(2)
    plt.plot(np.arange(T), R[:, 0], '-k', lw=2)
    plt.ion()
    ln = plt.plot(np.arange(T), test_model.compute_rate()[:, 0], '-r')[0]
    plt.show()
    plt.pause(0.001)

    return ln, im_net
Beispiel #11
0
def initialize_plots(true_model, test_model, S):
    K = true_model.K
    C = true_model.C
    R = true_model.compute_rate(S=S)
    T = S.shape[0]

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A,
                 true_model.weight_model.W)
    plt.pause(0.001)


    # Plot the true and inferred firing rate
    plt.figure(2)
    plt.plot(np.arange(T), R[:,0], '-k', lw=2)
    plt.ion()
    ln = plt.plot(np.arange(T), test_model.compute_rate()[:,0], '-r')[0]
    plt.show()
    plt.pause(0.001)

    return ln, im_net
Beispiel #12
0
def demo(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 1
    K = 10
    T = 1000
    dt = 1.0
    B = 3

    # Create a true model
    p = 0.8 * np.eye(C)
    v = 10.0 * np.eye(C) + 20.0 * (1-np.eye(C))
    # m = 0.5 * np.ones(C)
    c = (0.0 * (np.arange(K) < 10) + 1.0 * (np.arange(K)  >= 10)).astype(np.int)
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(C=C, K=K, dt=dt, B=B, c=c, p=p, v=v)

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A,
                 true_model.weight_model.W,
                 vmax=0.5)
    plt.pause(0.001)

    # Sample from the true model
    S,R = true_model.generate(T=T)


    # Make a new model for inference
    test_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, B=B, beta=1.0)
    test_model.add_data(S)

    # Plot the true and inferred firing rate
    kplt = 0
    plt.figure()
    plt.plot(np.arange(T), R[:,kplt], '-k', lw=2)
    plt.ion()
    ln = plt.plot(np.arange(T), test_model.compute_rate(ks=kplt), '-r')[0]
    plt.show()

    # Gradient descent
    N_steps = 10000
    lls = []
    for itr in xrange(N_steps):
        W,ll,grad = test_model.gradient_descent_step(stepsz=0.001)
        lls.append(ll)

        # Update plot
        if itr % 5 == 0:
            ln.set_data(np.arange(T), test_model.compute_rate(ks=kplt))
            plt.title("Iteration %d" % itr)
            plt.pause(0.001)

    plt.ioff()

    print "W true:        ", true_model.weight_model.A * true_model.weight_model.W
    print "lambda0 true:  ", true_model.bias_model.lambda0
    print "ll true:       ", true_model.log_likelihood()
    print ""
    print "W test:        ", test_model.W
    print "lambda0 test   ", test_model.bias
    print "ll test:       ", test_model.log_likelihood()


    plt.figure()
    plt.plot(np.arange(N_steps), lls)
    plt.xlabel("Iteration")
    plt.ylabel("Log likelihood")

    plot_network(np.ones((K,K)), test_model.W, vmax=0.5)
    plt.show()
Beispiel #13
0
def test_sbm_mf(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 5
    K = 100
    T = 1000
    dt = 1.0
    B = 3
    p = 0.4 * np.eye(C) + (0.05) * (1-np.eye(C))

    # Generate from a true model
    network_hypers = {'C': C, 'beta': 1.0/K, 'p': p}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K, dt=dt, B=B,
                                                            network_hypers=network_hypers)
    c = true_model.network.c
    perm = np.argsort(c)
    #
    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A[np.ix_(perm, perm)],
                 true_model.weight_model.W[np.ix_(perm, perm)])
    plt.pause(0.001)

    # Make a new model for inference
    test_network_hypers = {'C': C, 'beta': 1.0/K, 'tau0': 0.5, 'tau1': 0.5}
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, B=B,
                                                            network_hypers=test_network_hypers)
    test_model.weight_model.initialize_from_gibbs(true_model.weight_model.A,
                                                  true_model.weight_model.W)

    # Plot the block probabilities
    plt.figure()
    im = plt.imshow(test_model.network.mf_m[perm,:],
                    interpolation="none", cmap="Greys",
                    aspect=float(C)/K)
    plt.xlabel('C')
    plt.ylabel('K')
    plt.show()
    plt.pause(0.001)

    # Run mean field updates for the SBM given a fixed network
    N_iters = 20
    c_samples = []
    vlbs = []
    for itr in xrange(N_iters):
        if itr % 5 == 0:
            print "Iteration: ", itr

        # Update the plot
        im.set_data(test_model.network.mf_m[perm,:])
        plt.pause(0.001)


        # Resample from meanfield distribution
        test_model.network.resample_from_mf()
        c_samples.append(copy.deepcopy(test_model.network.c))
        vlbs.append(test_model.network.get_vlb() + test_model.weight_model.get_vlb())

        if itr > 0:

            if vlbs[-1] - vlbs[-2] < -1e-3:
                print "VLBS are not increasing"
                print np.array(vlbs)
                # import pdb; pdb.set_trace()
                raise Exception("VLBS are not increasing!")


        # Take a mean field step
        test_model.network.meanfieldupdate(test_model.weight_model)

    plt.ioff()

    # Compute sample statistics for second half of samples
    c_samples = np.array(c_samples)
    vlbs = np.array(vlbs)

    print "True c: ", true_model.network.c
    print "Test c: ", c_samples[-10:, :]

    # Compute the adjusted mutual info score of the clusterings
    amis = []
    arss = []
    for c in c_samples:
        amis.append(adjusted_mutual_info_score(true_model.network.c, c))
        arss.append(adjusted_rand_score(true_model.network.c, c))

    plt.figure()
    plt.plot(np.arange(N_iters), amis, '-r')
    plt.plot(np.arange(N_iters), arss, '-b')
    plt.xlabel("Iteration")
    plt.ylabel("Clustering score")

    plt.figure()
    plt.plot(np.arange(N_iters), vlbs)
    plt.xlabel("Iteration")
    plt.ylabel("VLB")

    plt.show()
Beispiel #14
0
def test_sbm_mf(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 5
    K = 50
    T = 1000
    dt = 1.0
    B = 3
    p = 0.4 * np.eye(C) + (0.05) * (1-np.eye(C))

    # Generate from a true model
    network_hypers = {'C': C, 'beta': 1.0/K, 'p': p}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K, dt=dt, B=B,
                                                            network_hypers=network_hypers)
    c = true_model.network.c
    perm = np.argsort(c)
    #
    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A[np.ix_(perm, perm)],
                 true_model.weight_model.W[np.ix_(perm, perm)])
    plt.pause(0.001)

    # Make a new model for inference
    test_network_hypers = {'C': C, 'beta': 1.0/K, 'tau0': 0.5, 'tau1': 0.5}
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, B=B,
                                                            network_hypers=test_network_hypers)
    test_model.weight_model.initialize_from_gibbs(true_model.weight_model.A,
                                                  true_model.weight_model.W)

    # Plot the block probabilities
    plt.figure()
    im = plt.imshow(test_model.network.mf_m[perm,:],
                    interpolation="none", cmap="Greys",
                    aspect=float(C)/K)
    plt.xlabel('C')
    plt.ylabel('K')
    plt.show()
    plt.pause(0.001)

    # Run mean field updates for the SBM given a fixed network
    N_iters = 50
    c_samples = []
    vlbs = []
    for itr in xrange(N_iters):
        if itr % 5 == 0:
            print "Iteration: ", itr

        # Update the plot
        im.set_data(test_model.network.mf_m[perm,:])
        plt.pause(0.001)


        # Resample from meanfield distribution
        test_model.network.resample_from_mf()
        c_samples.append(copy.deepcopy(test_model.network.c))
        vlbs.append(test_model.network.get_vlb() + test_model.weight_model.get_vlb())

        if itr > 0:

            if vlbs[-1] - vlbs[-2] < -1e-3:
                print "VLBS are not increasing"
                print np.array(vlbs)
                # import pdb; pdb.set_trace()
                # raise Exception("VLBS are not increasing!")


        # Take a mean field step
        test_model.network.meanfieldupdate(test_model.weight_model)

    plt.ioff()

    # Compute sample statistics for second half of samples
    c_samples = np.array(c_samples)
    vlbs = np.array(vlbs)

    print "True c: ", true_model.network.c
    print "Test c: ", c_samples[-10:, :]

    # Compute the adjusted mutual info score of the clusterings
    amis = []
    arss = []
    for c in c_samples:
        amis.append(adjusted_mutual_info_score(true_model.network.c, c))
        arss.append(adjusted_rand_score(true_model.network.c, c))

    plt.figure()
    plt.plot(np.arange(N_iters), amis, '-r')
    plt.plot(np.arange(N_iters), arss, '-b')
    plt.xlabel("Iteration")
    plt.ylabel("Clustering score")

    plt.figure()
    plt.plot(np.arange(N_iters), vlbs)
    plt.xlabel("Iteration")
    plt.ylabel("VLB")

    plt.show()
Beispiel #15
0
def generate_synthetic_data(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print("Setting seed to ", seed)
    np.random.seed(seed)

    # Create a true model
    # Larger v (weight scale) implies smaller weights

    T_test = 1000

    # Debugging network:
    # C = 1
    # K = 4
    # T = 1000
    # dt = 1.0
    # B = 3
    # p = 0.5
    # kappa = 3.0
    # v = kappa * 5.0
    # c = np.zeros(K, dtype=np.int)

    # Small network:
    # Seed: 1957629166
    # C = 4
    # K = 20
    # T = 10000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.9 * np.eye(C) + 0.05 * (1-np.eye(C))
    # v = kappa * (5.0 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Medium network:
    # Seed: 2723361959
    # C = 5
    # K = 50
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.75 * np.eye(C) + 0.05 * (1-np.eye(C))
    # v = kappa * (9 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Medium netowrk 2:
    # Seed = 3848328624
    # C = 5
    # K = 50
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 2.0
    # c = np.arange(C).repeat((K // C))
    # p = 0.4 * np.eye(C) + 0.01 * (1-np.eye(C))
    # v = kappa * (5 * np.eye(C) + 5.0 * (1-np.eye(C)))

    # Medium netowrk, one cluster
    # Seed: 3848328624
    C = 1
    K = 50
    T = 100000
    dt = 1.0
    B = 3
    p = 0.08
    kappa = 3.0
    v = kappa * 5.0
    c = np.zeros(K, dtype=np.int)

    # Large network:
    # Seed = 2467634490
    # C = 5
    # K = 100
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.4 * np.eye(C) + 0.025 * (1-np.eye(C))
    # v = kappa * (10 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Large network 2:
    # Seed =
    # C = 10
    # K = 100
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.75 * np.eye(C) + 0.05 * (1-np.eye(C))
    # v = kappa * (9 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Extra large network:
    # Seed: 2327447870
    # C = 20
    # K = 1000
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.25 * np.eye(C) + 0.0025 * (1-np.eye(C))
    # v = kappa * (15 * np.eye(C) + 30.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Create the model with these parameters
    network_hypers = {'C': C, 'kappa': kappa, 'c': c, 'p': p, 'v': v}

    # Create a simple network
    from pyhawkes.internals.network import ErdosRenyiFixedSparsity
    network = ErdosRenyiFixedSparsity(K, p, kappa, v=v)

    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K,
                                                            dt=dt,
                                                            B=B,
                                                            network=network)

    assert true_model.check_stability()

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A, true_model.weight_model.W)
    plt.pause(0.001)

    # Sample from the true model
    S, R = true_model.generate(T=T, keep=False, print_interval=50)

    # Pickle and save the data
    out_dir = os.path.join('data', "synthetic")
    out_name = 'synthetic_K%d_C%d_T%d.pkl.gz' % (K, C, T)
    out_path = os.path.join(out_dir, out_name)
    with gzip.open(out_path, 'w') as f:
        print("Saving output to ", out_path)
        pickle.dump((S, true_model), f, protocol=-1)

    # Sample test data
    S_test, _ = true_model.generate(T=T_test, keep=False)

    # Pickle and save the data
    out_dir = os.path.join('data', "synthetic")
    out_name = 'synthetic_test_K%d_C%d_T%d.pkl.gz' % (K, C, T_test)
    out_path = os.path.join(out_dir, out_name)
    with gzip.open(out_path, 'w') as f:
        print("Saving output to ", out_path)
        pickle.dump((S_test, true_model), f, protocol=-1)
def fit_network_hawkes_gibbs(S, K, C, B, dt, dt_max,
                             output_path,
                             standard_model=None):

    samples_and_timestamps = load_partial_results(output_path, typ="gibbs")
    if samples_and_timestamps is not None:
        samples, timestamps = samples_and_timestamps

    # # Check for existing Gibbs results
    # if os.path.exists(output_path + ".gibbs.pkl"):
    #     with open(output_path + ".gibbs.pkl", 'r') as f:
    #         print "Loading Gibbs results from ", (output_path + ".gibbs.pkl")
    #         (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using Gibbs sampling"

        # Make a new model for inference
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/20.0}
        test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max, B=B,
                                                                network_hypers=network_hypers)
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5)
        plt.pause(0.001)

        # Gibbs sample
        N_samples = 1000
        samples = []
        lps = []
        timestamps = [time.clock()]
        for itr in xrange(N_samples):
            lps.append(test_model.log_probability())
            # lps.append(test_model.log_likelihood())
            samples.append(test_model.resample_and_copy())
            timestamps.append(time.clock())

            if itr % 1 == 0:
                print "Iteration ", itr, "\t LL: ", lps[-1]
            #    im.set_data(test_model.weight_model.A * \
            #                test_model.weight_model.W)
            #    plt.pause(0.001)

            # Save this sample
            with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump((samples[-1], timestamps[-1]-timestamps[0]), f, protocol=-1)

        # Save the Gibbs timestamps
        timestamps = np.array(timestamps)
        with open(output_path + ".gibbs.timestamps.pkl", 'w') as f:
            print "Saving Gibbs samples to ", (output_path + ".gibbs.timestamps.pkl")
            cPickle.dump(timestamps, f, protocol=-1)

        # Save the Gibbs samples
        with open(output_path + ".gibbs.pkl", 'w') as f:
            print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl")
            cPickle.dump((samples, timestamps[1:] - timestamps[0]), f, protocol=-1)

    return samples, timestamps
def fit_network_hawkes_vb(S, K, C, B, dt, dt_max,
                           output_path,
                           standard_model=None):

    samples_and_timestamps = load_partial_results(output_path, typ="vb")
    if samples_and_timestamps is not None:
        samples, timestamps = samples_and_timestamps

    # # Check for existing Gibbs results
    # if os.path.exists(output_path + ".vb.pkl.gz"):
    #     with gzip.open(output_path + ".vb.pkl.gz", 'r') as f:
    #         print "Loading vb results from ", (output_path + ".vb.pkl.gz")
    #         (samples, timestamps) = cPickle.load(f)
    #
    #         if isinstance(timestamps, list):
    #             timestamps = np.array(timestamps)

    else:
        print "Fitting the data with a network Hawkes model using Batch VB"

        # Make a new model for inference
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/20.0}
        test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max, B=B,
                                                                network_hypers=network_hypers)
        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5)
        plt.pause(0.001)

        # TODO: Add the data in minibatches
        minibatchsize = 500
        test_model.add_data(S)


        # Stochastic variational inference
        N_iters = 1000
        vlbs = []
        samples = []
        start = time.clock()
        timestamps = []
        for itr in xrange(N_iters):
            vlbs.append(test_model.meanfield_coordinate_descent_step())
            print "Batch VB Iter: ", itr, "\tVLB: ", vlbs[-1]
            samples.append(test_model.copy_sample())
            timestamps.append(time.clock())

            if itr % 1 == 0:
                im.set_data(test_model.weight_model.expected_W())
                plt.pause(0.001)

            # Save this sample
            with open(output_path + ".vb.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump((samples[-1], timestamps[-1] - start), f, protocol=-1)

        # Save the Gibbs samples
        timestamps = np.array(timestamps)
        with gzip.open(output_path + ".vb.pkl.gz", 'w') as f:
            print "Saving VB samples to ", (output_path + ".vb.pkl.gz")
            cPickle.dump((samples, timestamps - start), f, protocol=-1)

    return samples, timestamps
def fit_network_hawkes_svi(S, K, C, dt, dt_max,
                           output_path,
                           standard_model=None,
                            N_iters=500):


    # Check for existing Gibbs results
    # if os.path.exists(output_path + ".svi.pkl.gz"):
    #     with gzip.open(output_path + ".svi.pkl.gz", 'r') as f:
    #         print "Loading SVI results from ", (output_path + ".svi.pkl.gz")
    #         (samples, timestamps) = cPickle.load(f)
    if os.path.exists(output_path + ".svi.itr%04d.pkl" % (N_iters-1)):
            with open(output_path + ".svi.itr%04d.pkl" % (N_iters-1), 'r') as f:
                print "Loading SVI results from ", (output_path + ".svi.itr%04d.pkl" % (N_iters-1))
                sample = cPickle.load(f)
                samples = [sample]
                timestamps = None
                # (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using SVI"

        # Make a new model for inference
        test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True)
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/10.0,
                          'tau1': 1.0, 'tau0': 10.0,
                          'allow_self_connections': False}
        test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max,
                                                                basis=test_basis,
                                                                network_hypers=network_hypers)
        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5)
        plt.pause(0.001)

        # Plot the block affiliations
        plt.figure(2)
        KC = np.zeros((K,C))
        KC[np.arange(K), test_model.network.c] = 1.0
        im_clus = plt.imshow(KC,
                        interpolation="none", cmap="Greys",
                        aspect=float(C)/K)

        # TODO: Add the data in minibatches
        minibatchsize = 1000
        test_model.add_data(S)


        # Stochastic variational inference
        samples = []
        delay = 1.0
        forgetting_rate = 0.5
        stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate)
        timestamps = []
        for itr in xrange(N_iters):
            print "SVI Iter: ", itr, "\tStepsize: ", stepsize[itr]
            test_model.sgd_step(minibatchsize=minibatchsize, stepsize=stepsize[itr])
            test_model.resample_from_mf()
            samples.append(test_model.copy_sample())
            timestamps.append(time.clock())

            if itr % 1 == 0:
                plt.figure(1)
                im.set_data(test_model.weight_model.expected_W())
                plt.pause(0.001)

                plt.figure(2)
                im_clus.set_data(test_model.network.mf_m)
                plt.title("Iteration %d" % itr)
                plt.pause(0.001)

            # Save this sample
            with open(output_path + ".svi.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump(samples[-1], f, protocol=-1)

        # Save the Gibbs samples
        # with gzip.open(output_path + ".svi.pkl.gz", 'w') as f:
        #     print "Saving SVI samples to ", (output_path + ".svi.pkl.gz")
        #     cPickle.dump((samples, timestamps), f, protocol=-1)

    return samples, timestamps
def fit_ct_network_hawkes_gibbs(S,
                                K,
                                C,
                                dt,
                                dt_max,
                                output_path,
                                standard_model=None):

    # Check for existing Gibbs results
    if os.path.exists(output_path + ".gibbs.pkl"):
        with open(output_path + ".gibbs.pkl", 'r') as f:
            print "Loading Gibbs results from ", (output_path + ".gibbs.pkl")
            (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using Gibbs sampling"

        S_ct, C_ct, T = convert_discrete_to_continuous(S, dt)

        # Set the network prior such that E[W] ~= 0.01
        # W ~ Gamma(kappa, v) for kappa = 1.25 => v ~ 125
        # v ~ Gamma(alpha, beta) for alpha = 10, beta = 10 / 125
        E_W = 0.2
        kappa = 10.
        E_v = kappa / E_W
        alpha = 5.
        beta = alpha / E_v
        network_hypers = {
            'C': 1,
            "c": np.zeros(K).astype(np.int),
            "p": 0.25,
            "v": E_v,
            # 'kappa': kappa,
            # 'alpha': alpha, 'beta': beta,
            # 'p': 0.1,
            'allow_self_connections': False
        }

        test_model = \
            ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max,
                                             network_hypers=network_hypers)
        test_model.add_data(S_ct, C_ct, T)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A,
                          test_model.weight_model.W,
                          vmax=0.025)
        plt.pause(0.001)

        # Gibbs sample
        N_samples = 100
        samples = []
        lps = [test_model.log_probability()]
        timestamps = []
        for itr in xrange(N_samples):
            if itr % 1 == 0:
                print "Iteration ", itr, "\tLL: ", lps[-1]
                im.set_data(test_model.weight_model.W_effective)
                plt.pause(0.001)

            # lps.append(test_model.log_probability())
            lps.append(test_model.log_probability())
            samples.append(test_model.resample_and_copy())
            timestamps.append(time.clock())

            print test_model.network.p

            # Save this sample
            with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump(samples[-1], f, protocol=-1)

        # Save the Gibbs samples
        with open(output_path + ".gibbs.pkl", 'w') as f:
            print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl")
            cPickle.dump((samples, timestamps), f, protocol=-1)

    return samples, timestamps
def demo(seed=None):
    """
    Suppose we have a very long recording such that computing gradients of
    the log likelihood is quite expensive. Here we explore the use of
    stochastic gradient descent to fit the standard Hawkes model, which has
    a convex log likelihood. We first initialize the parameters using BFGS
    on a manageable subset of the data. Then we use SGD to refine the parameters
    on the entire dataset.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 1  # Number of clusters in the true data
    K = 10  # Number of nodes
    T = 10000  # Number of time bins to simulate
    dt = 1.0  # Time bin size
    B = 3  # Number of basis functions

    # Sample from the network Hawkes model
    S, R, true_model = sample_from_network_hawkes(C, K, T, dt, B)

    # Make a model to initialize the parameters
    init_len = 256
    init_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, B=B, beta=1.0)
    init_model.add_data(S[:init_len, :])

    print "Initializing with BFGS on first ", init_len, " time bins."
    init_model.fit_with_bfgs()

    # Make another model for inference
    test_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, B=B, beta=1.0)
    # Initialize with the BFGS parameters
    test_model.weights = init_model.weights
    # Add the data in minibatches
    test_model.add_data(S, minibatchsize=256)

    # Plot the true and inferred firing rate
    kplt = 0
    plt.figure()
    plt.plot(np.arange(256), R[:256, kplt], '-k', lw=2)
    plt.ion()
    ln = plt.plot(np.arange(256),
                  test_model.compute_rate(ks=kplt)[:256], '-r')[0]
    plt.show()

    # Gradient descent
    N_steps = 10000
    lls = []
    learning_rate = 0.01 * np.ones(N_steps)
    momentum = 0.8 * np.ones(N_steps)
    prev_velocity = None
    for itr in xrange(N_steps):
        W, ll, prev_velocity = test_model.sgd_step(prev_velocity,
                                                   learning_rate[itr],
                                                   momentum[itr])
        lls.append(ll)

        # Update plot
        if itr % 5 == 0:
            ln.set_data(np.arange(256), test_model.compute_rate(ks=kplt))
            plt.title("Iteration %d" % itr)
            plt.pause(0.001)

    plt.ioff()

    print "W true:        ", true_model.weight_model.A * true_model.weight_model.W
    print "lambda0 true:  ", true_model.bias_model.lambda0
    print ""
    print "W test:        ", test_model.W
    print "lambda0 test   ", test_model.bias

    plt.figure()
    plt.plot(np.arange(N_steps), lls)
    plt.xlabel("Iteration")
    plt.ylabel("Log likelihood")

    plot_network(np.ones((K, K)), test_model.W)
    plt.show()
def generate_synthetic_data(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    # Create a true model
    # Larger v (weight scale) implies smaller weights

    T_test=1000

    # Debugging network:
    # C = 1
    # K = 4
    # T = 1000
    # dt = 1.0
    # B = 3
    # p = 0.5
    # kappa = 3.0
    # v = kappa * 5.0
    # c = np.zeros(K, dtype=np.int)

    # Small network:
    # Seed: 1957629166
    # C = 4
    # K = 20
    # T = 10000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.9 * np.eye(C) + 0.05 * (1-np.eye(C))
    # v = kappa * (5.0 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Medium network:
    # Seed: 2723361959
    # C = 5
    # K = 50
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.75 * np.eye(C) + 0.05 * (1-np.eye(C))
    # v = kappa * (9 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Medium netowrk 2:
    # Seed = 3848328624
    # C = 5
    # K = 50
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 2.0
    # c = np.arange(C).repeat((K // C))
    # p = 0.4 * np.eye(C) + 0.01 * (1-np.eye(C))
    # v = kappa * (5 * np.eye(C) + 5.0 * (1-np.eye(C)))

    # Medium netowrk, one cluster
    # Seed: 3848328624
    C = 1
    K = 50
    T = 100000
    dt = 1.0
    B = 3
    p = 0.08
    kappa = 3.0
    v = kappa * 5.0
    c = np.zeros(K, dtype=np.int)

    # Large network:
    # Seed = 2467634490
    # C = 5
    # K = 100
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.4 * np.eye(C) + 0.025 * (1-np.eye(C))
    # v = kappa * (10 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Large network 2:
    # Seed =
    # C = 10
    # K = 100
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.75 * np.eye(C) + 0.05 * (1-np.eye(C))
    # v = kappa * (9 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Extra large network:
    # Seed: 2327447870
    # C = 20
    # K = 1000
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.25 * np.eye(C) + 0.0025 * (1-np.eye(C))
    # v = kappa * (15 * np.eye(C) + 30.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))


    # Create the model with these parameters
    network_hypers = {'C': C, 'kappa': kappa, 'c': c, 'p': p, 'v': v}

    # Create a simple network
    from pyhawkes.internals.network import ErdosRenyiFixedSparsity
    network = ErdosRenyiFixedSparsity(K, p, kappa, v=v)

    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, B=B,
                                                            network=network)

    assert true_model.check_stability()

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A,
                 true_model.weight_model.W)
    plt.pause(0.001)

    # Sample from the true model
    S,R = true_model.generate(T=T, keep=False, print_interval=50)

    # Pickle and save the data
    out_dir  = os.path.join('data', "synthetic")
    out_name = 'synthetic_K%d_C%d_T%d.pkl.gz' % (K,C,T)
    out_path = os.path.join(out_dir, out_name)
    with gzip.open(out_path, 'w') as f:
        print "Saving output to ", out_path
        cPickle.dump((S, true_model), f, protocol=-1)

    # Sample test data
    S_test,_ = true_model.generate(T=T_test, keep=False)

    # Pickle and save the data
    out_dir  = os.path.join('data', "synthetic")
    out_name = 'synthetic_test_K%d_C%d_T%d.pkl.gz' % (K,C,T_test)
    out_path = os.path.join(out_dir, out_name)
    with gzip.open(out_path, 'w') as f:
        print "Saving output to ", out_path
        cPickle.dump((S_test, true_model), f, protocol=-1)
Beispiel #22
0
def test_gibbs_sbm(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 10
    K = 100
    T = 1000
    dt = 1.0
    B = 3

    # Generate from a true model
    network_hypers = {'C': C, 'beta': 1.0 / K}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt=dt, B=B, network_hypers=network_hypers)
    # S,R = true_model.generate(T=T)
    c = true_model.network.c
    perm = np.argsort(c)

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A[np.ix_(perm, perm)],
                 true_model.weight_model.W[np.ix_(perm, perm)])
    plt.pause(0.001)

    # Make a new model for inference
    network_hypers = {'C': C, 'beta': 1.0 / K}
    test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt=dt, B=B, network_hypers=network_hypers)
    # test_model.add_data(S)

    # Gibbs sample
    N_samples = 10
    samples = []
    lps = []
    for itr in xrange(N_samples):
        if itr % 5 == 0:
            print "Iteration: ", itr
        samples.append(copy.deepcopy(test_model.get_parameters()))

        lps.append(test_model.log_probability())

        # Resample the network only
        test_model.network.resample(
            (true_model.weight_model.A, true_model.weight_model.W))

    plt.ioff()

    # Compute sample statistics for second half of samples
    c_samples = np.array([c for _, _, _, _, c, _, _, _ in samples])

    print "True c: ", true_model.network.c
    print "Test c: ", c_samples[-10:, :]

    # Compute the adjusted mutual info score of the clusterings
    amis = []
    arss = []
    for c in c_samples:
        amis.append(adjusted_mutual_info_score(true_model.network.c, c))
        arss.append(adjusted_rand_score(true_model.network.c, c))

    plt.figure()
    plt.plot(np.arange(N_samples), amis, '-r')
    plt.plot(np.arange(N_samples), arss, '-b')
    plt.xlabel("Iteration")
    plt.ylabel("Clustering score")
    plt.show()
def fit_network_hawkes_gibbs(S,
                             K,
                             C,
                             B,
                             dt,
                             dt_max,
                             output_path,
                             standard_model=None):

    samples_and_timestamps = load_partial_results(output_path, typ="gibbs")
    if samples_and_timestamps is not None:
        samples, timestamps = samples_and_timestamps

    # # Check for existing Gibbs results
    # if os.path.exists(output_path + ".gibbs.pkl"):
    #     with open(output_path + ".gibbs.pkl", 'r') as f:
    #         print "Loading Gibbs results from ", (output_path + ".gibbs.pkl")
    #         (samples, timestamps) = cPickle.load(f)

    else:
        print(
            "Fitting the data with a network Hawkes model using Gibbs sampling"
        )

        # Make a new model for inference
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0 / 20.0}
        test_model = DiscreteTimeNetworkHawkesModelGammaMixture(
            K=K, dt=dt, dt_max=dt_max, B=B, network_hypers=network_hypers)
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A,
                          test_model.weight_model.W,
                          vmax=0.5)
        plt.pause(0.001)

        # Gibbs sample
        N_samples = 1000
        samples = []
        lps = []
        timestamps = [time.clock()]
        for itr in range(N_samples):
            lps.append(test_model.log_probability())
            # lps.append(test_model.log_likelihood())
            samples.append(test_model.resample_and_copy())
            timestamps.append(time.clock())

            if itr % 1 == 0:
                print("Iteration ", itr, "\t LL: ", lps[-1])
            #    im.set_data(test_model.weight_model.A * \
            #                test_model.weight_model.W)
            #    plt.pause(0.001)

            # Save this sample
            with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
                pickle.dump((samples[-1], timestamps[-1] - timestamps[0]),
                            f,
                            protocol=-1)

        # Save the Gibbs timestamps
        timestamps = np.array(timestamps)
        with open(output_path + ".gibbs.timestamps.pkl", 'w') as f:
            print("Saving Gibbs samples to ",
                  (output_path + ".gibbs.timestamps.pkl"))
            pickle.dump(timestamps, f, protocol=-1)

        # Save the Gibbs samples
        with open(output_path + ".gibbs.pkl", 'w') as f:
            print("Saving Gibbs samples to ", (output_path + ".gibbs.pkl"))
            pickle.dump((samples, timestamps[1:] - timestamps[0]),
                        f,
                        protocol=-1)

    return samples, timestamps
def fit_network_hawkes_vb(S,
                          K,
                          C,
                          B,
                          dt,
                          dt_max,
                          output_path,
                          standard_model=None):

    samples_and_timestamps = load_partial_results(output_path, typ="vb")
    if samples_and_timestamps is not None:
        samples, timestamps = samples_and_timestamps

    # # Check for existing Gibbs results
    # if os.path.exists(output_path + ".vb.pkl.gz"):
    #     with gzip.open(output_path + ".vb.pkl.gz", 'r') as f:
    #         print "Loading vb results from ", (output_path + ".vb.pkl.gz")
    #         (samples, timestamps) = cPickle.load(f)
    #
    #         if isinstance(timestamps, list):
    #             timestamps = np.array(timestamps)

    else:
        print("Fitting the data with a network Hawkes model using Batch VB")

        # Make a new model for inference
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0 / 20.0}
        test_model = DiscreteTimeNetworkHawkesModelGammaMixture(
            K=K, dt=dt, dt_max=dt_max, B=B, network_hypers=network_hypers)
        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A,
                          test_model.weight_model.W,
                          vmax=0.5)
        plt.pause(0.001)

        # TODO: Add the data in minibatches
        minibatchsize = 500
        test_model.add_data(S)

        # Stochastic variational inference
        N_iters = 1000
        vlbs = []
        samples = []
        start = time.clock()
        timestamps = []
        for itr in range(N_iters):
            vlbs.append(test_model.meanfield_coordinate_descent_step())
            print("Batch VB Iter: ", itr, "\tVLB: ", vlbs[-1])
            samples.append(test_model.copy_sample())
            timestamps.append(time.clock())

            if itr % 1 == 0:
                im.set_data(test_model.weight_model.expected_W())
                plt.pause(0.001)

            # Save this sample
            with open(output_path + ".vb.itr%04d.pkl" % itr, 'w') as f:
                pickle.dump((samples[-1], timestamps[-1] - start),
                            f,
                            protocol=-1)

        # Save the Gibbs samples
        timestamps = np.array(timestamps)
        with gzip.open(output_path + ".vb.pkl.gz", 'w') as f:
            print("Saving VB samples to ", (output_path + ".vb.pkl.gz"))
            pickle.dump((samples, timestamps - start), f, protocol=-1)

    return samples, timestamps
Beispiel #25
0
def demo(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 1
    K = 10
    T = 1000
    dt = 1.0
    B = 3

    # Create a true model
    p = 0.8 * np.eye(C)
    v = 10.0 * np.eye(C) + 20.0 * (1 - np.eye(C))
    # m = 0.5 * np.ones(C)
    c = (0.0 * (np.arange(K) < 10) + 1.0 * (np.arange(K) >= 10)).astype(np.int)
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(C=C,
                                                            K=K,
                                                            dt=dt,
                                                            B=B,
                                                            c=c,
                                                            p=p,
                                                            v=v)

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A,
                 true_model.weight_model.W,
                 vmax=0.5)
    plt.pause(0.001)

    # Sample from the true model
    S, R = true_model.generate(T=T)

    # Make a new model for inference
    test_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, B=B, beta=1.0)
    test_model.add_data(S)

    # Plot the true and inferred firing rate
    kplt = 0
    plt.figure()
    plt.plot(np.arange(T), R[:, kplt], '-k', lw=2)
    plt.ion()
    ln = plt.plot(np.arange(T), test_model.compute_rate(ks=kplt), '-r')[0]
    plt.show()

    # Gradient descent
    N_steps = 10000
    lls = []
    for itr in xrange(N_steps):
        W, ll, grad = test_model.gradient_descent_step(stepsz=0.001)
        lls.append(ll)

        # Update plot
        if itr % 5 == 0:
            ln.set_data(np.arange(T), test_model.compute_rate(ks=kplt))
            plt.title("Iteration %d" % itr)
            plt.pause(0.001)

    plt.ioff()

    print "W true:        ", true_model.weight_model.A * true_model.weight_model.W
    print "lambda0 true:  ", true_model.bias_model.lambda0
    print "ll true:       ", true_model.log_likelihood()
    print ""
    print "W test:        ", test_model.W
    print "lambda0 test   ", test_model.bias
    print "ll test:       ", test_model.log_likelihood()

    plt.figure()
    plt.plot(np.arange(N_steps), lls)
    plt.xlabel("Iteration")
    plt.ylabel("Log likelihood")

    plot_network(np.ones((K, K)), test_model.W, vmax=0.5)
    plt.show()
Beispiel #26
0
def demo(seed=None):
    """
    Suppose we have a very long recording such that computing gradients of
    the log likelihood is quite expensive. Here we explore the use of
    stochastic gradient descent to fit the standard Hawkes model, which has
    a convex log likelihood. We first initialize the parameters using BFGS
    on a manageable subset of the data. Then we use SGD to refine the parameters
    on the entire dataset.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 1       # Number of clusters in the true data
    K = 10      # Number of nodes
    T = 10000   # Number of time bins to simulate
    dt = 1.0    # Time bin size
    B = 3       # Number of basis functions

    # Sample from the network Hawkes model
    S, R, true_model = sample_from_network_hawkes(C, K, T, dt, B)

    # Make a model to initialize the parameters
    init_len   = 256
    init_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, B=B, beta=1.0)
    init_model.add_data(S[:init_len, :])

    print "Initializing with BFGS on first ", init_len, " time bins."
    init_model.fit_with_bfgs()

    # Make another model for inference
    test_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, B=B, beta=1.0)
    # Initialize with the BFGS parameters
    test_model.weights = init_model.weights
    # Add the data in minibatches
    test_model.add_data(S, minibatchsize=256)

    # Plot the true and inferred firing rate
    kplt = 0
    plt.figure()
    plt.plot(np.arange(256), R[:256,kplt], '-k', lw=2)
    plt.ion()
    ln = plt.plot(np.arange(256), test_model.compute_rate(ks=kplt)[:256], '-r')[0]
    plt.show()

    # Gradient descent
    N_steps = 10000
    lls = []
    learning_rate = 0.01 * np.ones(N_steps)
    momentum = 0.8 * np.ones(N_steps)
    prev_velocity = None
    for itr in xrange(N_steps):
        W,ll,prev_velocity = test_model.sgd_step(prev_velocity, learning_rate[itr], momentum[itr])
        lls.append(ll)

        # Update plot
        if itr % 5 == 0:
            ln.set_data(np.arange(256), test_model.compute_rate(ks=kplt))
            plt.title("Iteration %d" % itr)
            plt.pause(0.001)

    plt.ioff()

    print "W true:        ", true_model.weight_model.A * true_model.weight_model.W
    print "lambda0 true:  ", true_model.bias_model.lambda0
    print ""
    print "W test:        ", test_model.W
    print "lambda0 test   ", test_model.bias

    plt.figure()
    plt.plot(np.arange(N_steps), lls)
    plt.xlabel("Iteration")
    plt.ylabel("Log likelihood")

    plot_network(np.ones((K,K)), test_model.W)
    plt.show()
def fit_network_hawkes_gibbs(S, K, C, dt, dt_max, output_path, standard_model=None):

    # Check for existing Gibbs results
    if os.path.exists(output_path + ".gibbs.pkl"):
        with open(output_path + ".gibbs.pkl", "r") as f:
            print "Loading Gibbs results from ", (output_path + ".gibbs.pkl")
            (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using Gibbs sampling"

        # Make a new model for inference
        # test_model = DiscreteTimeNetworkHawkesModelGammaMixture(C=C, K=K, dt=dt, dt_max=dt_max, B=B,
        #                                                         alpha=1.0, beta=1.0/20.0)
        test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True)

        # Set the network prior such that E[W] ~= 0.01
        # W ~ Gamma(kappa, v) for kappa = 1.25 => v ~ 125
        # v ~ Gamma(alpha, beta) for alpha = 10, beta = 10 / 125
        E_W = 0.01
        kappa = 10.0
        E_v = kappa / E_W
        alpha = 10.0
        beta = alpha / E_v
        network_hypers = {
            "C": 2,
            "kappa": kappa,
            "alpha": alpha,
            "beta": beta,
            "p": 0.8,
            "allow_self_connections": False,
        }
        test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
            K=K, dt=dt, dt_max=dt_max, basis=test_basis, network_hypers=network_hypers
        )
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5)
        plt.pause(0.001)

        # Gibbs sample
        N_samples = 100
        samples = []
        lps = [test_model.log_probability()]
        timestamps = []
        for itr in xrange(N_samples):
            if itr % 1 == 0:
                print "Iteration ", itr, "\tLL: ", lps[-1]
                im.set_data(test_model.weight_model.W_effective)
                plt.pause(0.001)

            # lps.append(test_model.log_probability())
            lps.append(test_model.log_probability())
            samples.append(test_model.resample_and_copy())
            timestamps.append(time.clock())

            # Save this sample
            with open(output_path + ".gibbs.itr%04d.pkl" % itr, "w") as f:
                cPickle.dump(samples[-1], f, protocol=-1)

        # Save the Gibbs samples
        with open(output_path + ".gibbs.pkl", "w") as f:
            print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl")
            cPickle.dump((samples, timestamps), f, protocol=-1)

    return samples, timestamps
def fit_ct_network_hawkes_gibbs(S, K, C, dt, dt_max, output_path, standard_model=None):

    # Check for existing Gibbs results
    if os.path.exists(output_path + ".gibbs.pkl"):
        with open(output_path + ".gibbs.pkl", "r") as f:
            print "Loading Gibbs results from ", (output_path + ".gibbs.pkl")
            (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using Gibbs sampling"

        S_ct, C_ct, T = convert_discrete_to_continuous(S, dt)

        # Set the network prior such that E[W] ~= 0.01
        # W ~ Gamma(kappa, v) for kappa = 1.25 => v ~ 125
        # v ~ Gamma(alpha, beta) for alpha = 10, beta = 10 / 125
        E_W = 0.2
        kappa = 10.0
        E_v = kappa / E_W
        alpha = 5.0
        beta = alpha / E_v
        network_hypers = {
            "C": 1,
            "c": np.zeros(K).astype(np.int),
            "p": 0.25,
            "v": E_v,
            # 'kappa': kappa,
            # 'alpha': alpha, 'beta': beta,
            # 'p': 0.1,
            "allow_self_connections": False,
        }

        test_model = ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, network_hypers=network_hypers)
        test_model.add_data(S_ct, C_ct, T)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.025)
        plt.pause(0.001)

        # Gibbs sample
        N_samples = 100
        samples = []
        lps = [test_model.log_probability()]
        timestamps = []
        for itr in xrange(N_samples):
            if itr % 1 == 0:
                print "Iteration ", itr, "\tLL: ", lps[-1]
                im.set_data(test_model.weight_model.W_effective)
                plt.pause(0.001)

            # lps.append(test_model.log_probability())
            lps.append(test_model.log_probability())
            samples.append(test_model.resample_and_copy())
            timestamps.append(time.clock())

            print test_model.network.p

            # Save this sample
            with open(output_path + ".gibbs.itr%04d.pkl" % itr, "w") as f:
                cPickle.dump(samples[-1], f, protocol=-1)

        # Save the Gibbs samples
        with open(output_path + ".gibbs.pkl", "w") as f:
            print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl")
            cPickle.dump((samples, timestamps), f, protocol=-1)

    return samples, timestamps
Beispiel #29
0
def test_gibbs_sbm(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 10
    K = 100
    T = 1000
    dt = 1.0
    B = 3

    # Generate from a true model
    network_hypers = {'C': C, 'beta': 1.0/K}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, B=B,
                                                            network_hypers=network_hypers)
    # S,R = true_model.generate(T=T)
    c = true_model.network.c
    perm = np.argsort(c)

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A[np.ix_(perm, perm)],
                 true_model.weight_model.W[np.ix_(perm, perm)])
    plt.pause(0.001)


    # Make a new model for inference
    network_hypers = {'C': C, 'beta': 1.0/K}
    test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, B=B,
                                                            network_hypers=network_hypers)
    # test_model.add_data(S)

    # Gibbs sample
    N_samples = 10
    samples = []
    lps = []
    for itr in xrange(N_samples):
        if itr % 5 == 0:
            print "Iteration: ", itr
        samples.append(copy.deepcopy(test_model.get_parameters()))

        lps.append(test_model.log_probability())

        # Resample the network only
        test_model.network.resample((true_model.weight_model.A,
                                     true_model.weight_model.W))

    plt.ioff()

    # Compute sample statistics for second half of samples
    c_samples       = np.array([c for _,_,_,_,c,_,_,_ in samples])

    print "True c: ", true_model.network.c
    print "Test c: ", c_samples[-10:, :]

    # Compute the adjusted mutual info score of the clusterings
    amis = []
    arss = []
    for c in c_samples:
        amis.append(adjusted_mutual_info_score(true_model.network.c, c))
        arss.append(adjusted_rand_score(true_model.network.c, c))

    plt.figure()
    plt.plot(np.arange(N_samples), amis, '-r')
    plt.plot(np.arange(N_samples), arss, '-b')
    plt.xlabel("Iteration")
    plt.ylabel("Clustering score")
    plt.show()