def fit_network_hawkes_gibbs(S, K, C, dt, dt_max,
                             output_path,
                             standard_model=None):

    # Check for existing Gibbs results
    if os.path.exists(output_path + ".gibbs.pkl"):
        with open(output_path + ".gibbs.pkl", 'r') as f:
            print "Loading Gibbs results from ", (output_path + ".gibbs.pkl")
            (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using Gibbs sampling"

        # Make a new model for inference
        # test_model = DiscreteTimeNetworkHawkesModelGammaMixture(C=C, K=K, dt=dt, dt_max=dt_max, B=B,
        #                                                         alpha=1.0, beta=1.0/20.0)
        test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True)
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/10.0,
                          'tau1': 1.0, 'tau0': 10.0,
                          'allow_self_connections': False}
        test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max,
                                                                basis=test_basis,
                                                                network_hypers=network_hypers)
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5)
        plt.pause(0.001)

        # Gibbs sample
        N_samples = 100
        samples = []
        lps = [test_model.log_probability()]
        timestamps = []
        for itr in xrange(N_samples):
            if itr % 1 == 0:
                print "Iteration ", itr, "\tLL: ", lps[-1]
                im.set_data(test_model.weight_model.W_effective)
                plt.pause(0.001)

            # lps.append(test_model.log_probability())
            lps.append(test_model.log_probability())
            samples.append(test_model.resample_and_copy())
            timestamps.append(time.clock())

            # Save this sample
            with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump(samples[-1], f, protocol=-1)

        # Save the Gibbs samples
        with open(output_path + ".gibbs.pkl", 'w') as f:
            print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl")
            cPickle.dump((samples, timestamps), f, protocol=-1)

    return samples, timestamps
예제 #2
0
def demo(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    raise NotImplementedError("This example needs to be updated.")

    if seed is None:
        seed = np.random.randint(2**32)

    print("Setting seed to ", seed)
    np.random.seed(seed)

    C = 1  # Number of clusters in the true data
    K = 10  # Number of nodes
    T = 1000  # Number of time bins to simulate
    dt = 0.02  # Time bin size
    dt_max = 0.08
    B = 3  # Number of basis functions

    # Sample from a sparse network Hawkes model
    S, true_model = sample_from_network_hawkes(C, K, T, dt, dt_max, B)

    # Make a new model for inference
    test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=False)
    test_model = DiscreteTimeStandardHawkesModel(K=K,
                                                 dt=dt,
                                                 dt_max=dt_max + dt,
                                                 beta=1.0,
                                                 basis=test_basis,
                                                 allow_self_connections=True)
    test_model.add_data(S)

    # DEBUG: Initialize with the true parameters of the network Hawkes model
    # test_model.initialize_with_gibbs_model(true_model)

    test_model.fit_with_bfgs()

    print("W true:        ",
          true_model.weight_model.A * true_model.weight_model.W)
    print("lambda0 true:  ", true_model.bias_model.lambda0)
    print("ll true:       ", true_model.log_likelihood())
    print("")
    print("W test:        ", test_model.W)
    print("lambda0 test   ", test_model.bias)
    print("ll test:       ", test_model.log_likelihood())

    plot_network(np.ones((K, K)), test_model.W, vmax=0.5)

    # Plot the rates
    plt.figure()
    for k in range(3):
        plt.subplot(3, 1, k + 1)
        plt.plot(np.arange(T) * dt, true_model.compute_rate(proc=k), '-b')
        plt.plot(np.arange(T) * dt, test_model.compute_rate(ks=k), '-r')

    plt.ioff()
    plt.show()
def fit_standard_hawkes_model_bfgs_noxv(S,
                                        K,
                                        dt,
                                        dt_max,
                                        output_path,
                                        W_max=None):
    """
    Fit
    :param S:
    :return:
    """
    # Check for existing results
    if os.path.exists(out_path + ".bfgs.pkl"):
        print "Existing BFGS results found. Loading from file."
        with open(output_path + ".bfgs.pkl", 'r') as f:
            init_model, init_time = cPickle.load(f)

    else:
        print "Fitting the data with a standard Hawkes model"
        # We want the max W ~ -.025 and the mean to be around 0.01
        # W ~ Gamma(alpha, beta) => E[W] = alpha/beta, so beta ~100 * alpha
        alpha = 1.1
        beta = alpha * 1.0 / 0.01

        # Make a model to initialize the parameters
        test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True)
        init_model = DiscreteTimeStandardHawkesModel(
            K=K,
            dt=dt,
            dt_max=dt_max,
            alpha=alpha,
            beta=beta,
            basis=test_basis,
            allow_self_connections=False,
            W_max=W_max)
        init_model.add_data(S)

        # Initialize the background rates to their mean
        init_model.initialize_to_background_rate()

        start = time.clock()
        init_model.fit_with_bfgs()
        init_time = time.clock() - start

        # Save the model (sans data)
        with open(output_path + ".bfgs.pkl", 'w') as f:
            print "Saving BFGS results to ", (output_path + ".bfgs.pkl")
            cPickle.dump((init_model, init_time), f, protocol=-1)

    return init_model, init_time
def fit_network_hawkes_svi(S,
                           K,
                           C,
                           dt,
                           dt_max,
                           output_path,
                           standard_model=None,
                           N_iters=500,
                           true_network=None):

    # Check for existing Gibbs results
    if os.path.exists(output_path + ".svi.pkl.gz"):
        with gzip.open(output_path + ".svi.pkl.gz", 'r') as f:
            print "Loading SVI results from ", (output_path + ".svi.pkl.gz")
            (samples, timestamps) = cPickle.load(f)
    elif os.path.exists(output_path + ".svi.itr%04d.pkl" % (N_iters - 1)):
        with open(output_path + ".svi.itr%04d.pkl" % (N_iters - 1), 'r') as f:
            print "Loading SVI results from ", (output_path +
                                                ".svi.itr%04d.pkl" %
                                                (N_iters - 1))
            sample = cPickle.load(f)
            samples = [sample]
            timestamps = None
            # (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using SVI"

        # Make a new model for inference
        test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True)
        E_W = 0.01
        kappa = 10.
        E_v = kappa / E_W
        alpha = 10.
        beta = alpha / E_v
        # network_hypers = {'C': 2,
        #                   'kappa': kappa, 'alpha': alpha, 'beta': beta,
        #                   'p': 0.1, 'tau1': 1.0, 'tau0': 1.0,
        #                   'allow_self_connections': False}
        # test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max,
        #                                                         basis=test_basis,
        #                                                         network_hypers=network_hypers)

        network_hypers = {
            'C': 2,
            'kappa': kappa,
            'alpha': alpha,
            'beta': beta,
            'p': 0.8,
            'allow_self_connections': False
        }
        test_model = DiscreteTimeNetworkHawkesModelGammaMixtureSBM(
            K=K,
            dt=dt,
            dt_max=dt_max,
            basis=test_basis,
            network_hypers=network_hypers)
        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)
        #
        # plt.ion()
        # im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.03)
        # plt.pause(0.001)

        # TODO: Add the data in minibatches
        minibatchsize = 3000
        test_model.add_data(S)

        # Stochastic variational inference
        samples = []
        delay = 10.0
        forgetting_rate = 0.5
        stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate)
        timestamps = []
        for itr in xrange(N_iters):
            if true_network is not None:
                # W_score = test_model.weight_model.expected_A()
                W_score = test_model.weight_model.expected_W()
                print "AUC: ", roc_auc_score(true_network.ravel(),
                                             W_score.ravel())

            print "SVI Iter: ", itr, "\tStepsize: ", stepsize[itr]
            test_model.sgd_step(minibatchsize=minibatchsize,
                                stepsize=stepsize[itr])
            test_model.resample_from_mf()
            samples.append(test_model.copy_sample())
            timestamps.append(time.clock())
            #
            # if itr % 1 == 0:
            #     plt.figure(1)
            #     im.set_data(test_model.weight_model.expected_W())
            #     plt.pause(0.001)

            # Save this sample
            with open(output_path + ".svi.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump(samples[-1], f, protocol=-1)

        # Save the Gibbs samples
        with gzip.open(output_path + ".svi.pkl.gz", 'w') as f:
            print "Saving SVI samples to ", (output_path + ".svi.pkl.gz")
            cPickle.dump((samples, timestamps), f, protocol=-1)

    return samples, timestamps
def fit_standard_hawkes_model_bfgs(S, K, dt, dt_max, output_path, W_max=None):
    """
    Fit
    :param S:
    :return:
    """
    # Check for existing results
    if os.path.exists(out_path + ".bfgs.pkl"):
        print "Existing BFGS results found. Loading from file."
        with open(output_path + ".bfgs.pkl", 'r') as f:
            init_model, init_time = cPickle.load(f)

    else:
        print "Fitting the data with a standard Hawkes model"
        # betas = np.logspace(-1,1.3,num=1)
        # betas = [ 0.0 ]

        # We want the max W ~ -.025 and the mean to be around 0.01
        # W ~ Gamma(alpha, beta) => E[W] = alpha/beta, so beta ~100 * alpha
        alpha = 1.1
        betas = [alpha * 1.0 / 0.01]

        init_models = []
        xv_len = 10000
        init_len = S.shape[0] - 10000
        S_init = S[:init_len, :]

        xv_ll = np.zeros(len(betas))
        S_xv = S[init_len:init_len + xv_len, :]

        # Make a model to initialize the parameters
        test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True)
        init_model = DiscreteTimeStandardHawkesModel(
            K=K,
            dt=dt,
            dt_max=dt_max,
            alpha=alpha,
            beta=0.0,
            basis=test_basis,
            allow_self_connections=False,
            W_max=W_max)
        init_model.add_data(S_init)
        # Initialize the background rates to their mean
        init_model.initialize_to_background_rate()

        start = time.clock()
        for i, beta in enumerate(betas):
            print "Fitting with BFGS on first ", init_len, " time bins, ", \
                "beta = ", beta, "W_max = ", W_max
            init_model.beta = beta
            init_model.fit_with_bfgs()
            init_models.append(init_model.copy_sample())

            # Compute the heldout likelihood on the xv data
            xv_ll[i] = init_model.heldout_log_likelihood(S_xv)
            if not np.isfinite(xv_ll[i]):
                xv_ll[i] = -np.inf

        init_time = time.clock() - start

        # Take the best model
        print "XV predictive log likelihoods: "
        for beta, ll in zip(betas, xv_ll):
            print "Beta: %.2f\tLL: %.2f" % (beta, ll)
        best_ind = np.argmax(xv_ll)
        print "Best beta: ", betas[best_ind]
        init_model = init_models[best_ind]

        if best_ind == 0 or best_ind == len(betas) - 1:
            print "WARNING: Best BFGS model was for extreme value of beta. " \
                  "Consider expanding the beta range."

        # Save the model (sans data)
        with open(output_path + ".bfgs.pkl", 'w') as f:
            print "Saving BFGS results to ", (output_path + ".bfgs.pkl")
            cPickle.dump((init_model, init_time), f, protocol=-1)

    return init_model, init_time
def fit_network_hawkes_svi(S, K, C, dt, dt_max,
                           output_path,
                           standard_model=None,
                            N_iters=500):


    # Check for existing Gibbs results
    # if os.path.exists(output_path + ".svi.pkl.gz"):
    #     with gzip.open(output_path + ".svi.pkl.gz", 'r') as f:
    #         print "Loading SVI results from ", (output_path + ".svi.pkl.gz")
    #         (samples, timestamps) = cPickle.load(f)
    if os.path.exists(output_path + ".svi.itr%04d.pkl" % (N_iters-1)):
            with open(output_path + ".svi.itr%04d.pkl" % (N_iters-1), 'r') as f:
                print "Loading SVI results from ", (output_path + ".svi.itr%04d.pkl" % (N_iters-1))
                sample = cPickle.load(f)
                samples = [sample]
                timestamps = None
                # (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using SVI"

        # Make a new model for inference
        test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True)
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/10.0,
                          'tau1': 1.0, 'tau0': 10.0,
                          'allow_self_connections': False}
        test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, dt_max=dt_max,
                                                                basis=test_basis,
                                                                network_hypers=network_hypers)
        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5)
        plt.pause(0.001)

        # Plot the block affiliations
        plt.figure(2)
        KC = np.zeros((K,C))
        KC[np.arange(K), test_model.network.c] = 1.0
        im_clus = plt.imshow(KC,
                        interpolation="none", cmap="Greys",
                        aspect=float(C)/K)

        # TODO: Add the data in minibatches
        minibatchsize = 1000
        test_model.add_data(S)


        # Stochastic variational inference
        samples = []
        delay = 1.0
        forgetting_rate = 0.5
        stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate)
        timestamps = []
        for itr in xrange(N_iters):
            print "SVI Iter: ", itr, "\tStepsize: ", stepsize[itr]
            test_model.sgd_step(minibatchsize=minibatchsize, stepsize=stepsize[itr])
            test_model.resample_from_mf()
            samples.append(test_model.copy_sample())
            timestamps.append(time.clock())

            if itr % 1 == 0:
                plt.figure(1)
                im.set_data(test_model.weight_model.expected_W())
                plt.pause(0.001)

                plt.figure(2)
                im_clus.set_data(test_model.network.mf_m)
                plt.title("Iteration %d" % itr)
                plt.pause(0.001)

            # Save this sample
            with open(output_path + ".svi.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump(samples[-1], f, protocol=-1)

        # Save the Gibbs samples
        # with gzip.open(output_path + ".svi.pkl.gz", 'w') as f:
        #     print "Saving SVI samples to ", (output_path + ".svi.pkl.gz")
        #     cPickle.dump((samples, timestamps), f, protocol=-1)

    return samples, timestamps
def fit_network_hawkes_svi(S,
                           K,
                           C,
                           dt,
                           dt_max,
                           output_path,
                           standard_model=None,
                           N_iters=100,
                           true_network=None):
    """
    From Scott Linderman's experiments in https://github.com/slinderman/pyhawkes/tree/master/experiments
    """
    # Check for existing Gibbs results
    if os.path.exists(output_path + ".svi.pkl.gz"):
        with gzip.open(output_path + ".svi.pkl.gz", 'r') as f:
            print("Loading SVI results from ", (output_path + ".svi.pkl.gz"))
            (samples, timestamps) = pickle.load(f)
    elif os.path.exists(output_path + ".svi.itr%04d.pkl" % (N_iters - 1)):
        with open(output_path + ".svi.itr%04d.pkl" % (N_iters - 1), 'r') as f:
            print("Loading SVI results from ",
                  (output_path + ".svi.itr%04d.pkl" % (N_iters - 1)))
            sample = pickle.load(f)
            samples = [sample]
            timestamps = None
            # (samples, timestamps) = cPickle.load(f)

    else:
        print("Fitting the data with a network Hawkes model using SVI")

        #------------- Make a new model for inference
        test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True)
        E_W = 0.01
        kappa = 10.
        E_v = kappa / E_W
        alpha = 10.
        beta = alpha / E_v
        network_hypers = {
            'C': 2,
            'kappa': kappa,
            'alpha': alpha,
            'beta': beta,
            'p': 0.8,
            'allow_self_connections': False
        }
        test_model = DiscreteTimeNetworkHawkesModelGammaMixtureSBM(
            K=K,
            dt=dt,
            dt_max=dt_max,
            basis=test_basis,
            network_hypers=network_hypers)
        #------------- Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)
        minibatchsize = 3000
        test_model.add_data(S)

        #------------- Stochastic variational inference learning with default algorithm hyperparameters
        samples = []
        delay = 10.0
        forgetting_rate = 0.5
        stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate)
        timestamps = []
        for itr in range(N_iters):

            print("SVI Iter: ", itr, "\tStepsize: ", stepsize[itr])
            test_model.sgd_step(minibatchsize=minibatchsize,
                                stepsize=stepsize[itr])
            test_model.resample_from_mf()
            samples.append(test_model.copy_sample())
            timestamps.append(time.clock())

            with open(output_path + ".svi.itr%04d.pkl" % itr, 'w') as f:
                pickle.dump(samples[-1], f, protocol=-1)

        with gzip.open(output_path + ".svi.pkl.gz", 'w') as f:
            print("Saving SVI samples to ", (output_path + ".svi.pkl.gz"))
            pickle.dump((samples, timestamps), f, protocol=-1)

    return samples, timestamps