Beispiel #1
0
def fit_ct_network_hawkes_gibbs(S,
                                S_test,
                                dt,
                                dt_max,
                                output_path,
                                model_args={},
                                standard_model=None,
                                N_samples=100,
                                time_limit=8 * 60 * 60):

    K = S.shape[1]
    S_ct, C_ct, T = convert_discrete_to_continuous(S, dt)
    S_test_ct, C_test_ct, T_test = convert_discrete_to_continuous(S_test, dt)

    # Check for existing Gibbs results
    if os.path.exists(output_path):
        with gzip.open(output_path, 'r') as f:
            print("Loading Gibbs results from ", output_path)
            results = pickle.load(f)
    else:
        print(
            "Fitting the data with a continuous time network Hawkes model using Gibbs sampling"
        )

        test_model = \
            ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, **model_args)
        test_model.add_data(S_ct, C_ct, T)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        # Gibbs sample
        samples = []
        lps = [test_model.log_probability()]
        hlls = [
            test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test)
        ]
        times = [0]
        for _ in progprint_xrange(N_samples, perline=25):
            # Update the model
            tic = time.time()
            test_model.resample_model()
            times.append(time.time() - tic)

            samples.append(copy.deepcopy(test_model.get_parameters()))

            # Compute log probability and heldout log likelihood
            # lps.append(test_model.log_probability())
            hlls.append(
                test_model.heldout_log_likelihood(S_test_ct, C_test_ct,
                                                  T_test))

            # # Save this sample
            # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
            #     cPickle.dump(samples[-1], f, protocol=-1)

            # Check if time limit has been exceeded
            if np.sum(times) > time_limit:
                break

        # Get cumulative timestamps
        timestamps = np.cumsum(times)
        lps = np.array(lps)
        hlls = np.array(hlls)

        # Make results object
        results = Results(samples, timestamps, lps, hlls)

        # Save the Gibbs samples
        with gzip.open(output_path, 'w') as f:
            print("Saving Gibbs samples to ", output_path)
            pickle.dump(results, f, protocol=-1)

    return results
def compute_predictive_ll(S_test,
                          S_train,
                          true_model=None,
                          bfgs_model=None,
                          sgd_models=None,
                          gibbs_samples=None,
                          vb_models=None,
                          svi_models=None):
    """
    Compute the predictive log likelihood
    :return:
    """
    plls = {}

    # Compute homogeneous pred ll
    T = S_train.shape[0]
    T_test = S_test.shape[0]
    lam_homog = S_train.sum(axis=0) / float(T)
    plls['homog'] = 0
    plls['homog'] += -gammaln(S_test + 1).sum()
    plls['homog'] += (-lam_homog * T_test).sum()
    plls['homog'] += (S_test.sum(axis=0) * np.log(lam_homog)).sum()

    if true_model is not None:
        plls['true'] = true_model.heldout_log_likelihood(S_test)

    if bfgs_model is not None:
        assert isinstance(bfgs_model, DiscreteTimeStandardHawkesModel)
        plls['bfgs'] = bfgs_model.heldout_log_likelihood(S_test)

    if sgd_models is not None:
        assert isinstance(sgd_models, list)

        plls['sgd'] = np.zeros(len(sgd_models))
        for i, sgd_model in enumerate(sgd_models):
            plls['sgd'] = sgd_model.heldout_log_likelihood(S_test)

    if gibbs_samples is not None:
        print "Computing pred ll for Gibbs"
        # Compute log(E[pred likelihood]) on second half of samplese
        offset = len(gibbs_samples) // 2
        # Preconvolve with the Gibbs model's basis
        # F_test = gibbs_samples[0].basis.convolve_with_basis(S_test)
        S_ct, C_ct, T = convert_discrete_to_continuous(S_test, 0.02)

        plls['gibbs'] = []
        for s in gibbs_samples[offset:]:
            # plls['gibbs'].append(s.heldout_log_likelihood(S_test, F=F_test))
            plls['gibbs'].append(s.heldout_log_likelihood(S_ct, C_ct, T))

        # Convert to numpy array
        plls['gibbs'] = np.array(plls['gibbs'])

    import ipdb
    ipdb.set_trace()

    if vb_models is not None:
        print "Computing pred ll for VB"
        # Compute predictive likelihood over samples from VB model
        N_models = len(vb_models)
        N_samples = 100
        # Preconvolve with the VB model's basis
        F_test = vb_models[0].basis.convolve_with_basis(S_test)

        vb_plls = np.zeros((N_models, N_samples))
        for i, vb_model in enumerate(vb_models):
            for j in xrange(N_samples):
                vb_model.resample_from_mf()
                vb_plls[i, j] = vb_model.heldout_log_likelihood(S_test,
                                                                F=F_test)

        # Compute the log of the average predicted likelihood
        plls['vb'] = -np.log(N_samples) + logsumexp(vb_plls, axis=1)

    if svi_models is not None:
        print "Computing predictive likelihood for SVI models"
        # Compute predictive likelihood over samples from VB model
        N_models = len(svi_models)
        N_samples = 1
        # Preconvolve with the VB model's basis
        F_test = svi_models[0].basis.convolve_with_basis(S_test)

        svi_plls = np.zeros((N_models, N_samples))
        for i, svi_model in enumerate(svi_models):
            # print "Computing pred ll for SVI iteration ", i
            for j in xrange(N_samples):
                svi_model.resample_from_mf()
                svi_plls[i, j] = svi_model.heldout_log_likelihood(S_test,
                                                                  F=F_test)

        plls['svi'] = -np.log(N_samples) + logsumexp(svi_plls, axis=1)

    return plls
def fit_ct_network_hawkes_gibbs(S,
                                K,
                                C,
                                dt,
                                dt_max,
                                output_path,
                                standard_model=None):

    # Check for existing Gibbs results
    if os.path.exists(output_path + ".gibbs.pkl"):
        with open(output_path + ".gibbs.pkl", 'r') as f:
            print "Loading Gibbs results from ", (output_path + ".gibbs.pkl")
            (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using Gibbs sampling"

        S_ct, C_ct, T = convert_discrete_to_continuous(S, dt)

        # Set the network prior such that E[W] ~= 0.01
        # W ~ Gamma(kappa, v) for kappa = 1.25 => v ~ 125
        # v ~ Gamma(alpha, beta) for alpha = 10, beta = 10 / 125
        E_W = 0.2
        kappa = 10.
        E_v = kappa / E_W
        alpha = 5.
        beta = alpha / E_v
        network_hypers = {
            'C': 1,
            "c": np.zeros(K).astype(np.int),
            "p": 0.25,
            "v": E_v,
            # 'kappa': kappa,
            # 'alpha': alpha, 'beta': beta,
            # 'p': 0.1,
            'allow_self_connections': False
        }

        test_model = \
            ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max,
                                             network_hypers=network_hypers)
        test_model.add_data(S_ct, C_ct, T)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A,
                          test_model.weight_model.W,
                          vmax=0.025)
        plt.pause(0.001)

        # Gibbs sample
        N_samples = 100
        samples = []
        lps = [test_model.log_probability()]
        timestamps = []
        for itr in xrange(N_samples):
            if itr % 1 == 0:
                print "Iteration ", itr, "\tLL: ", lps[-1]
                im.set_data(test_model.weight_model.W_effective)
                plt.pause(0.001)

            # lps.append(test_model.log_probability())
            lps.append(test_model.log_probability())
            samples.append(test_model.resample_and_copy())
            timestamps.append(time.clock())

            print test_model.network.p

            # Save this sample
            with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump(samples[-1], f, protocol=-1)

        # Save the Gibbs samples
        with open(output_path + ".gibbs.pkl", 'w') as f:
            print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl")
            cPickle.dump((samples, timestamps), f, protocol=-1)

    return samples, timestamps
def compute_predictive_ll(
    S_test,
    S_train,
    true_model=None,
    bfgs_model=None,
    sgd_models=None,
    gibbs_samples=None,
    vb_models=None,
    svi_models=None,
):
    """
    Compute the predictive log likelihood
    :return:
    """
    plls = {}

    # Compute homogeneous pred ll
    T = S_train.shape[0]
    T_test = S_test.shape[0]
    lam_homog = S_train.sum(axis=0) / float(T)
    plls["homog"] = 0
    plls["homog"] += -gammaln(S_test + 1).sum()
    plls["homog"] += (-lam_homog * T_test).sum()
    plls["homog"] += (S_test.sum(axis=0) * np.log(lam_homog)).sum()

    if true_model is not None:
        plls["true"] = true_model.heldout_log_likelihood(S_test)

    if bfgs_model is not None:
        assert isinstance(bfgs_model, DiscreteTimeStandardHawkesModel)
        plls["bfgs"] = bfgs_model.heldout_log_likelihood(S_test)

    if sgd_models is not None:
        assert isinstance(sgd_models, list)

        plls["sgd"] = np.zeros(len(sgd_models))
        for i, sgd_model in enumerate(sgd_models):
            plls["sgd"] = sgd_model.heldout_log_likelihood(S_test)

    if gibbs_samples is not None:
        print "Computing pred ll for Gibbs"
        # Compute log(E[pred likelihood]) on second half of samplese
        offset = len(gibbs_samples) // 2
        # Preconvolve with the Gibbs model's basis
        # F_test = gibbs_samples[0].basis.convolve_with_basis(S_test)
        S_ct, C_ct, T = convert_discrete_to_continuous(S_test, 0.02)

        plls["gibbs"] = []
        for s in gibbs_samples[offset:]:
            # plls['gibbs'].append(s.heldout_log_likelihood(S_test, F=F_test))
            plls["gibbs"].append(s.heldout_log_likelihood(S_ct, C_ct, T))

        # Convert to numpy array
        plls["gibbs"] = np.array(plls["gibbs"])

    import ipdb

    ipdb.set_trace()

    if vb_models is not None:
        print "Computing pred ll for VB"
        # Compute predictive likelihood over samples from VB model
        N_models = len(vb_models)
        N_samples = 100
        # Preconvolve with the VB model's basis
        F_test = vb_models[0].basis.convolve_with_basis(S_test)

        vb_plls = np.zeros((N_models, N_samples))
        for i, vb_model in enumerate(vb_models):
            for j in xrange(N_samples):
                vb_model.resample_from_mf()
                vb_plls[i, j] = vb_model.heldout_log_likelihood(S_test, F=F_test)

        # Compute the log of the average predicted likelihood
        plls["vb"] = -np.log(N_samples) + logsumexp(vb_plls, axis=1)

    if svi_models is not None:
        print "Computing predictive likelihood for SVI models"
        # Compute predictive likelihood over samples from VB model
        N_models = len(svi_models)
        N_samples = 1
        # Preconvolve with the VB model's basis
        F_test = svi_models[0].basis.convolve_with_basis(S_test)

        svi_plls = np.zeros((N_models, N_samples))
        for i, svi_model in enumerate(svi_models):
            # print "Computing pred ll for SVI iteration ", i
            for j in xrange(N_samples):
                svi_model.resample_from_mf()
                svi_plls[i, j] = svi_model.heldout_log_likelihood(S_test, F=F_test)

        plls["svi"] = -np.log(N_samples) + logsumexp(svi_plls, axis=1)

    return plls
def fit_ct_network_hawkes_gibbs(S, K, C, dt, dt_max, output_path, standard_model=None):

    # Check for existing Gibbs results
    if os.path.exists(output_path + ".gibbs.pkl"):
        with open(output_path + ".gibbs.pkl", "r") as f:
            print "Loading Gibbs results from ", (output_path + ".gibbs.pkl")
            (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using Gibbs sampling"

        S_ct, C_ct, T = convert_discrete_to_continuous(S, dt)

        # Set the network prior such that E[W] ~= 0.01
        # W ~ Gamma(kappa, v) for kappa = 1.25 => v ~ 125
        # v ~ Gamma(alpha, beta) for alpha = 10, beta = 10 / 125
        E_W = 0.2
        kappa = 10.0
        E_v = kappa / E_W
        alpha = 5.0
        beta = alpha / E_v
        network_hypers = {
            "C": 1,
            "c": np.zeros(K).astype(np.int),
            "p": 0.25,
            "v": E_v,
            # 'kappa': kappa,
            # 'alpha': alpha, 'beta': beta,
            # 'p': 0.1,
            "allow_self_connections": False,
        }

        test_model = ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, network_hypers=network_hypers)
        test_model.add_data(S_ct, C_ct, T)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.025)
        plt.pause(0.001)

        # Gibbs sample
        N_samples = 100
        samples = []
        lps = [test_model.log_probability()]
        timestamps = []
        for itr in xrange(N_samples):
            if itr % 1 == 0:
                print "Iteration ", itr, "\tLL: ", lps[-1]
                im.set_data(test_model.weight_model.W_effective)
                plt.pause(0.001)

            # lps.append(test_model.log_probability())
            lps.append(test_model.log_probability())
            samples.append(test_model.resample_and_copy())
            timestamps.append(time.clock())

            print test_model.network.p

            # Save this sample
            with open(output_path + ".gibbs.itr%04d.pkl" % itr, "w") as f:
                cPickle.dump(samples[-1], f, protocol=-1)

        # Save the Gibbs samples
        with open(output_path + ".gibbs.pkl", "w") as f:
            print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl")
            cPickle.dump((samples, timestamps), f, protocol=-1)

    return samples, timestamps
Beispiel #6
0
def fit_ct_network_hawkes_gibbs(S, S_test, dt, dt_max, output_path,
                                model_args={}, standard_model=None,
                                N_samples=100, time_limit=8*60*60):

    K = S.shape[1]
    S_ct, C_ct, T = convert_discrete_to_continuous(S, dt)
    S_test_ct, C_test_ct, T_test = convert_discrete_to_continuous(S_test, dt)

    # Check for existing Gibbs results
    if os.path.exists(output_path):
        with gzip.open(output_path, 'r') as f:
            print "Loading Gibbs results from ", output_path
            results = cPickle.load(f)
    else:
        print "Fitting the data with a continuous time network Hawkes model using Gibbs sampling"

        test_model = \
            ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, **model_args)
        test_model.add_data(S_ct, C_ct, T)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        # Gibbs sample
        samples = []
        lps = [test_model.log_probability()]
        hlls = [test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test)]
        times = [0]
        for _ in progprint_xrange(N_samples, perline=25):
            # Update the model
            tic = time.time()
            test_model.resample_model()
            times.append(time.time() - tic)

            samples.append(copy.deepcopy(test_model.get_parameters()))

            # Compute log probability and heldout log likelihood
            # lps.append(test_model.log_probability())
            hlls.append(test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test))

            # # Save this sample
            # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
            #     cPickle.dump(samples[-1], f, protocol=-1)

            # Check if time limit has been exceeded
            if np.sum(times) > time_limit:
                break

        # Get cumulative timestamps
        timestamps = np.cumsum(times)
        lps = np.array(lps)
        hlls = np.array(hlls)

        # Make results object
        results = Results(samples, timestamps, lps, hlls)

        # Save the Gibbs samples
        with gzip.open(output_path, 'w') as f:
            print "Saving Gibbs samples to ", output_path
            cPickle.dump(results, f, protocol=-1)

    return results