Beispiel #1
0
def sample_from_network_hawkes(C, K, T, dt, dt_max, B):
    # Create a true model
    p = 0.8 * np.eye(C)
    v = 10.0 * np.eye(C) + 20.0 * (1 - np.eye(C))
    c = (0.0 * (np.arange(K) < 10) + 1.0 * (np.arange(K) >= 10)).astype(np.int)
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(C=C,
                                                            K=K,
                                                            dt=dt,
                                                            dt_max=dt_max,
                                                            B=B,
                                                            c=c,
                                                            p=p,
                                                            v=v)

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A,
                 true_model.weight_model.W,
                 vmax=0.5)

    # Sample from the true model
    S, R = true_model.generate(T=T)

    # Return the spike count matrix
    return S, true_model
def fit_network_hawkes_gibbs(S, K, C, dt, dt_max,
                             output_path,
                             standard_model=None):

    # Check for existing Gibbs results
    if os.path.exists(output_path + ".gibbs.pkl"):
        with open(output_path + ".gibbs.pkl", 'r') as f:
            print "Loading Gibbs results from ", (output_path + ".gibbs.pkl")
            (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using Gibbs sampling"

        # Make a new model for inference
        # test_model = DiscreteTimeNetworkHawkesModelGammaMixture(C=C, K=K, dt=dt, dt_max=dt_max, B=B,
        #                                                         alpha=1.0, beta=1.0/20.0)
        test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True)
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/10.0,
                          'tau1': 1.0, 'tau0': 10.0,
                          'allow_self_connections': False}
        test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max,
                                                                basis=test_basis,
                                                                network_hypers=network_hypers)
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5)
        plt.pause(0.001)

        # Gibbs sample
        N_samples = 100
        samples = []
        lps = [test_model.log_probability()]
        timestamps = []
        for itr in xrange(N_samples):
            if itr % 1 == 0:
                print "Iteration ", itr, "\tLL: ", lps[-1]
                im.set_data(test_model.weight_model.W_effective)
                plt.pause(0.001)

            # lps.append(test_model.log_probability())
            lps.append(test_model.log_probability())
            samples.append(test_model.resample_and_copy())
            timestamps.append(time.clock())

            # Save this sample
            with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump(samples[-1], f, protocol=-1)

        # Save the Gibbs samples
        with open(output_path + ".gibbs.pkl", 'w') as f:
            print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl")
            cPickle.dump((samples, timestamps), f, protocol=-1)

    return samples, timestamps
def sample_from_network_hawkes(K, T, dt, dt_max, B):
    # Create a true model
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max, B=B,
                                                            network_hypers=dict(p=0.1))

    # Plot the true network
    plt.ion()
    true_model.plot_network()

    # Sample from the true model
    S,R = true_model.generate(T=T)

    # Return the spike count matrix
    return S, true_model
Beispiel #4
0
def test_generate_statistics():
    K = 1
    T = 100
    dt = 1.0
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt)
    S, R = true_model.generate(T=T)

    E_N = np.trapz(R, dt * np.arange(T), axis=0)
    std_N = np.sqrt(E_N)
    N = S.sum(axis=0)

    assert np.all(N >= E_N - 3 * std_N), "N less than 3std below mean"
    assert np.all(N <= E_N + 3 * std_N), "N more than 3std above mean"

    print "Expected number of events: ", E_N
    print "Actual number of events:   ", S.sum(axis=0)
Beispiel #5
0
def test_compute_rate():
    K = 1
    T = 100
    dt = 1.0
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt)
    S, R = true_model.generate(T=T)

    print "Expected number of events: ", np.trapz(R, dt * np.arange(T), axis=0)
    print "Actual number of events:   ", S.sum(axis=0)

    print "Lambda0:  ", true_model.bias_model.lambda0
    print "W:        ", true_model.weight_model.W
    print ""

    R_test = true_model.compute_rate()
    assert np.allclose(R, R_test)
Beispiel #6
0
def generate_hawkes(edges):
    """
    UNDER CONSTRUCTION. Will need the package pyhawkes. Generates firing times
    of N individuals in a given network defined by edges.
    """
    from pyhawkes.models import DiscreteTimeNetworkHawkesModelSpikeAndSlab
    np.random.seed(1122334455)
    # Create a simple random network with K nodes a sparsity level of p
    # Each event induces impulse responses of length dt_max on connected nodes
    a = set()
    for edge in edges:
        a.add(edge[0])
        a.add(edge[1])
    K = len(a)
    p = 0.25
    dt_max = 20
    network_hypers = {"p": p, "allow_self_connections": False}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt_max=dt_max, network_hypers=network_hypers)
    A2 = np.zeros((K, K))
    for edge in edges:
        A2[edge[0]][edge[1]] = 1.0

    true_model.weight_model.A = A2
    A3 = 0.5 * np.ones((K, K))
    true_model.weight_model.W = A3

    Tmax = 50000
    S, R = true_model.generate(T=Tmax)

    times = dict()

    for idi in range(K):
        for it in range(Tmax):
            n = S[it][idi]
            if n > 0:
                if idi not in times.keys():
                    times[idi] = []
                else:
                    times[idi].append(it)

    ids = list(times.keys())

    for idn in ids:
        times[idn].sort()

    return ids, times
Beispiel #7
0
def test_compute_rate():
    K = 1
    T = 100
    dt = 1.0
    network_hypers = {'c': np.zeros(K, dtype=np.int), 'p': 1.0, 'kappa': 10.0, 'v': 10*5.0}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt,
                                                            network_hypers=network_hypers)
    S,R = true_model.generate(T=T)

    print "Expected number of events: ", np.trapz(R, dt * np.arange(T), axis=0)
    print "Actual number of events:   ", S.sum(axis=0)

    print "Lambda0:  ", true_model.bias_model.lambda0
    print "W:        ", true_model.weight_model.W
    print ""

    R_test = true_model.compute_rate()
    assert np.allclose(R, R_test)
Beispiel #8
0
def sample_from_network_hawkes(C, K, T, dt, B):
    # Create a true model
    p = 0.8 * np.eye(C)
    v = 10.0 * np.eye(C) + 20.0 * (1-np.eye(C))
    c = (0.0 * (np.arange(K) < 10) + 1.0 * (np.arange(K)  >= 10)).astype(np.int)
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(C=C, K=K, dt=dt, B=B, c=c, p=p, v=v)

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A,
                 true_model.weight_model.W,
                 vmax=0.5)

    # Sample from the true model
    S,R = true_model.generate(T=T)

    # Return the spike count matrix
    return S, R, true_model
Beispiel #9
0
def test_generate_statistics():
    K = 1
    T = 100
    dt = 1.0
    network_hypers = {'c': np.zeros(K, dtype=np.int), 'p': 1.0, 'kappa': 10.0, 'v': 10*5.0}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt,
                                                            network_hypers=network_hypers)
    S,R = true_model.generate(T=T)

    E_N = np.trapz(R, dt * np.arange(T), axis=0)
    std_N = np.sqrt(E_N)
    N = S.sum(axis=0)

    assert np.all(N >= E_N - 3*std_N), "N less than 3std below mean"
    assert np.all(N <= E_N + 3*std_N), "N more than 3std above mean"

    print "Expected number of events: ", E_N
    print "Actual number of events:   ", S.sum(axis=0)
Beispiel #10
0
def fit_network_hawkes_gibbs_ss(S, K, C, B, dt, dt_max,
                                output_path, p,
                                standard_model=None):

    samples_and_timestamps = load_partial_results(output_path, typ="gibbs_ss")
    if samples_and_timestamps is not None:
        samples, timestamps = samples_and_timestamps


    else:
        print "Fitting the data with a spike and slab network Hawkes model using Gibbs sampling"

        # Make a new model for inference
        network_hypers = {'C': C, 'alpha': 1.0, 'beta': 1.0/20.0, 'p': p,
                          'v': 5.0,'c': np.arange(C).repeat((K // C))}
        test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max, B=B,
                                                                network_hypers=network_hypers)
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        # Gibbs sample
        N_samples = 1000
        samples = []
        lps = []
        timestamps = [time.clock()]
        for itr in xrange(N_samples):
            lps.append(test_model.log_probability())
            samples.append(test_model.resample_and_copy())
            timestamps.append(time.clock())

            print test_model.network.v

            if itr % 1 == 0:
                print "Iteration ", itr, "\t LP: ", lps[-1]

            # Save this sample
            with open(output_path + ".gibbs_ss.itr%04d.pkl" % itr, 'w') as f:
                cPickle.dump((samples[-1], timestamps[-1]-timestamps[0]), f, protocol=-1)

        # Save the Gibbs timestamps
        timestamps = np.array(timestamps)
        with open(output_path + ".gibbs_ss.timestamps.pkl", 'w') as f:
            print "Saving spike and slab Gibbs samples to ", (output_path + ".gibbs_ss.timestamps.pkl")
            cPickle.dump(timestamps, f, protocol=-1)

        # Save the Gibbs samples
        with open(output_path + ".gibbs_ss.pkl", 'w') as f:
            print "Saving Gibbs samples to ", (output_path + ".gibbs_ss.pkl")
            cPickle.dump((samples, timestamps[1:] - timestamps[0]), f, protocol=-1)

    return samples, timestamps
def test_normalization():
    dt = 1.0
    dt_max = 10.0
    model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=1,
                                                       dt=dt,
                                                       dt_max=dt_max)

    basis = model.basis.basis
    volume = dt * basis.sum(axis=0)

    import pdb
    pdb.set_trace()
    assert np.allclose(volume, 1.0)
Beispiel #12
0
def test_sbm_mf(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 5
    K = 50
    T = 1000
    dt = 1.0
    B = 3
    p = 0.4 * np.eye(C) + (0.05) * (1-np.eye(C))

    # Generate from a true model
    network_hypers = {'C': C, 'beta': 1.0/K, 'p': p}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K, dt=dt, B=B,
                                                            network_hypers=network_hypers)
    c = true_model.network.c
    perm = np.argsort(c)
    #
    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A[np.ix_(perm, perm)],
                 true_model.weight_model.W[np.ix_(perm, perm)])
    plt.pause(0.001)

    # Make a new model for inference
    test_network_hypers = {'C': C, 'beta': 1.0/K, 'tau0': 0.5, 'tau1': 0.5}
    test_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, dt=dt, B=B,
                                                            network_hypers=test_network_hypers)
    test_model.weight_model.initialize_from_gibbs(true_model.weight_model.A,
                                                  true_model.weight_model.W)

    # Plot the block probabilities
    plt.figure()
    im = plt.imshow(test_model.network.mf_m[perm,:],
                    interpolation="none", cmap="Greys",
                    aspect=float(C)/K)
    plt.xlabel('C')
    plt.ylabel('K')
    plt.show()
    plt.pause(0.001)

    # Run mean field updates for the SBM given a fixed network
    N_iters = 50
    c_samples = []
    vlbs = []
    for itr in xrange(N_iters):
        if itr % 5 == 0:
            print "Iteration: ", itr

        # Update the plot
        im.set_data(test_model.network.mf_m[perm,:])
        plt.pause(0.001)


        # Resample from meanfield distribution
        test_model.network.resample_from_mf()
        c_samples.append(copy.deepcopy(test_model.network.c))
        vlbs.append(test_model.network.get_vlb() + test_model.weight_model.get_vlb())

        if itr > 0:

            if vlbs[-1] - vlbs[-2] < -1e-3:
                print "VLBS are not increasing"
                print np.array(vlbs)
                # import pdb; pdb.set_trace()
                # raise Exception("VLBS are not increasing!")


        # Take a mean field step
        test_model.network.meanfieldupdate(test_model.weight_model)

    plt.ioff()

    # Compute sample statistics for second half of samples
    c_samples = np.array(c_samples)
    vlbs = np.array(vlbs)

    print "True c: ", true_model.network.c
    print "Test c: ", c_samples[-10:, :]

    # Compute the adjusted mutual info score of the clusterings
    amis = []
    arss = []
    for c in c_samples:
        amis.append(adjusted_mutual_info_score(true_model.network.c, c))
        arss.append(adjusted_rand_score(true_model.network.c, c))

    plt.figure()
    plt.plot(np.arange(N_iters), amis, '-r')
    plt.plot(np.arange(N_iters), arss, '-b')
    plt.xlabel("Iteration")
    plt.ylabel("Clustering score")

    plt.figure()
    plt.plot(np.arange(N_iters), vlbs)
    plt.xlabel("Iteration")
    plt.ylabel("VLB")

    plt.show()
Beispiel #13
0
if __name__ == "__main__":
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    K = 1
    T = 50
    dt = 1.0
    dt_max = 3.0
    # network_hypers = {'C': 1, 'p': 0.5, 'kappa': 3.0, 'alpha': 3.0, 'beta': 1.0/20.0}
    network_hypers = {'c': np.zeros(K, dtype=np.int), 'p': 0.5, 'kappa': 10.0, 'v': 10*3.0}
    bkgd_hypers = {"alpha": 1., "beta": 10.}
    model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max,
                                                       weight_hypers={"parallel_resampling": False},
                                                       network_hypers=network_hypers)
    model.generate(T=T)

    # Gibbs sample and then generate new data
    N_samples = 10000
    samples = []
    lps = []
    for itr in progprint_xrange(N_samples, perline=50):
        # Resample the model
        model.resample_model()
        samples.append(model.copy_sample())
        lps.append(model.log_likelihood())

        # Geweke step
        model.data_list.pop()
Beispiel #14
0
def generate_synthetic_data(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print("Setting seed to ", seed)
    np.random.seed(seed)

    # Create a true model
    # Larger v (weight scale) implies smaller weights

    T_test = 1000

    # Debugging network:
    # C = 1
    # K = 4
    # T = 1000
    # dt = 1.0
    # B = 3
    # p = 0.5
    # kappa = 3.0
    # v = kappa * 5.0
    # c = np.zeros(K, dtype=np.int)

    # Small network:
    # Seed: 1957629166
    # C = 4
    # K = 20
    # T = 10000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.9 * np.eye(C) + 0.05 * (1-np.eye(C))
    # v = kappa * (5.0 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Medium network:
    # Seed: 2723361959
    # C = 5
    # K = 50
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.75 * np.eye(C) + 0.05 * (1-np.eye(C))
    # v = kappa * (9 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Medium netowrk 2:
    # Seed = 3848328624
    # C = 5
    # K = 50
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 2.0
    # c = np.arange(C).repeat((K // C))
    # p = 0.4 * np.eye(C) + 0.01 * (1-np.eye(C))
    # v = kappa * (5 * np.eye(C) + 5.0 * (1-np.eye(C)))

    # Medium netowrk, one cluster
    # Seed: 3848328624
    C = 1
    K = 50
    T = 100000
    dt = 1.0
    B = 3
    p = 0.08
    kappa = 3.0
    v = kappa * 5.0
    c = np.zeros(K, dtype=np.int)

    # Large network:
    # Seed = 2467634490
    # C = 5
    # K = 100
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.4 * np.eye(C) + 0.025 * (1-np.eye(C))
    # v = kappa * (10 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Large network 2:
    # Seed =
    # C = 10
    # K = 100
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.75 * np.eye(C) + 0.05 * (1-np.eye(C))
    # v = kappa * (9 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Extra large network:
    # Seed: 2327447870
    # C = 20
    # K = 1000
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.25 * np.eye(C) + 0.0025 * (1-np.eye(C))
    # v = kappa * (15 * np.eye(C) + 30.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Create the model with these parameters
    network_hypers = {'C': C, 'kappa': kappa, 'c': c, 'p': p, 'v': v}

    # Create a simple network
    from pyhawkes.internals.network import ErdosRenyiFixedSparsity
    network = ErdosRenyiFixedSparsity(K, p, kappa, v=v)

    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K,
                                                            dt=dt,
                                                            B=B,
                                                            network=network)

    assert true_model.check_stability()

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A, true_model.weight_model.W)
    plt.pause(0.001)

    # Sample from the true model
    S, R = true_model.generate(T=T, keep=False, print_interval=50)

    # Pickle and save the data
    out_dir = os.path.join('data', "synthetic")
    out_name = 'synthetic_K%d_C%d_T%d.pkl.gz' % (K, C, T)
    out_path = os.path.join(out_dir, out_name)
    with gzip.open(out_path, 'w') as f:
        print("Saving output to ", out_path)
        pickle.dump((S, true_model), f, protocol=-1)

    # Sample test data
    S_test, _ = true_model.generate(T=T_test, keep=False)

    # Pickle and save the data
    out_dir = os.path.join('data', "synthetic")
    out_name = 'synthetic_test_K%d_C%d_T%d.pkl.gz' % (K, C, T_test)
    out_path = os.path.join(out_dir, out_name)
    with gzip.open(out_path, 'w') as f:
        print("Saving output to ", out_path)
        pickle.dump((S_test, true_model), f, protocol=-1)
def demo(K=3, T=1000, dt_max=20, p=0.25):
    """

    :param K:       Number of nodes
    :param T:       Number of time bins to simulate
    :param dt_max:  Number of future time bins an event can influence
    :param p:       Sparsity of network
    :return:
    """
    ###########################################################
    # Generate synthetic data
    ###########################################################
    network_hypers = {"p": p, "allow_self_connections": False}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt_max=dt_max,
        network_hypers=network_hypers)
    assert true_model.check_stability()

    # Sample from the true model
    S,R = true_model.generate(T=T, keep=True, print_interval=50)

    plt.ion()
    true_figure, _ = true_model.plot(color="#377eb8", T_slice=(0,100))

    ###########################################################
    # Create a test spike and slab model
    ###########################################################
    test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt_max=dt_max,
        network_hypers=network_hypers)

    test_model.add_data(S)

    # Initialize plots
    test_figure, test_handles = test_model.plot(color="#e41a1c", T_slice=(0,100))

    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 100
    samples = []
    lps = []
    for itr in range(N_samples):
        print("Gibbs iteration ", itr)
        test_model.resample_model()
        lps.append(test_model.log_probability())
        samples.append(test_model.copy_sample())

        # Update plots
        test_model.plot(handles=test_handles)

    ###########################################################
    # Analyze the samples
    ###########################################################
    analyze_samples(true_model, samples, lps)
Beispiel #16
0
    def execute_toy(self,mode="discrete",dt_max=3,N_samples=1000,network_priors={"p": 1.0, "allow_self_connections": False}):
        #np.random.seed(0)
        if mode == 'discrete':
            test_model1 = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=self.K, dt_max=dt_max,
                        network_hypers=network_priors)
            test_model1.add_data(self.data)
            test_model1.initialize_with_standard_model(None)
        elif mode == 'continuous':
            test_model = ContinuousTimeNetworkHawkesModel(self.K, dt_max=dt_max,
                                                            network_hypers=network_hypers)
            test_model.add_data(self.data,self.labels)

        ###########################################################
        # Fit the test model with Gibbs sampling
        ###########################################################
        samples = []
        lps = []
        #for itr in xrange(N_samples):
        #    test_model1.resample_model()
        #    lps.append(test_model1.log_probability())
        #    samples.append(test_model1.copy_sample())

        test_model = DiscreteTimeStandardHawkesModel(K=self.K, dt_max=dt_max, allow_self_connections= False)
        #test_model.initialize_with_gibbs_model(test_model1)
        test_model.add_data(self.data)
        test_model.fit_with_bfgs()

        impulse =  test_model1.impulse_model.impulses
        responses = {}
        #for i in range(3):
        #    responses[str(i)] = []
        #    for j in range(3):
        #        responses[str(i)].append({"key":"response: process "+str(i)+" to "+str(j),"values":[{"x":idx,"y":k} for idx,k in enumerate(impulse[:,i,j])]})
        #    with open('/Users/PauKung/hawkes_demo/webapp/static/data/response'+str(i)+'.json','w') as outfile:
        #        json.dump({"out":responses[str(i)]},outfile)
        # calculate convolved basis
        rr = test_model.basis.convolve_with_basis(np.ones((dt_max*2,self.K)))
        impulse = np.sum(rr, axis=2)
        impulse[dt_max:,:] = 0
        for i in range(3):
            responses[str(i)] = {"key":"response: process "+str(i),"values":[{"x":idx,"y":k} for idx,k in enumerate(impulse[:,i])]}
            with open('/Users/PauKung/hawkes_demo/webapp/static/data/response'+str(i)+'.json','w') as outfile:
                json.dump({"out":responses[str(i)]},outfile)

        rates = test_model.compute_rate()#self.compute_rate(test_model,mode,dt_max)
        inferred_rate = {}
        S,F = test_model.data_list[0]
        print F
        for i in range(3):
            inferred_rate[str(i)] = []
            inferred_rate[str(i)].append({"key":"background",
                "values":[[j,test_model.bias[i]] for j in range(self.T)]})
                #"values":[[j,test_model1.bias_model.lambda0[i]] for j in range(self.T)]})
        for i in range(3):
            inferred_rate[str(i)].append({"key":"influence: process"+str(i),
                "values":[[idx,j-test_model.bias[i]] for idx,j in enumerate(rates[:,i])]})
            with open('/Users/PauKung/hawkes_demo/webapp/static/data/infer'+str(i)+'.json','w') as outfile:
                json.dump({"out":inferred_rate[str(i)]},outfile)
        # output response function diagram (K x K timeseries)
        #plt.subplot(3,3,1)
        #for i in range(3):
        #    for j in range(3):
        #        plt.subplot(3,3,3*i+(j+1))
        #        plt.plot(np.arange(4),impulse[:,i,j],color="#377eb8", lw=2)
        #plt.savefig(fpath+"response_fun.png",transparent=True)
        # output background bias diagram (K x 1 timeseries)
        #plt.subplot(3,1,1)
        #for i in range(3):
        #    plt.subplot(3,1,i+1)
        #    plt.plot(np.arange(4),[test_model.bias_model.lambda0[i] for j in range(4)],color="#333333",lw=2)
        #plt.savefig(fpath+"bias.png",transparent=True)
        # output inferred rate diagram (K x 1 timeseries)
        #test_figure, test_handles = test_model.plot(color="#e41a1c", T_slice=(0,self.T))
        #plt.savefig(fpath+"inferred_rate.png",transparent=True)
        print test_model.W
        return test_model.W, inferred_rate, responses
Beispiel #17
0
def demo(K=3, T=1000, dt_max=20, p=0.25):
    """

    :param K:       Number of nodes
    :param T:       Number of time bins to simulate
    :param dt_max:  Number of future time bins an event can influence
    :param p:       Sparsity of network
    :return:
    """
    ###########################################################
    # Generate synthetic data
    ###########################################################
    network = ErdosRenyiFixedSparsity(K, p, v=1., allow_self_connections=False)
    bkgd_hypers = {"alpha": 1.0, "beta": 20.0}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt_max=dt_max, bkgd_hypers=bkgd_hypers, network=network)
    A_true = np.zeros((K,K))
    A_true[0,1] = A_true[0,2] = 1
    W_true = np.zeros((K,K))
    W_true[0,1] = W_true[0,2] = 1.0
    true_model.weight_model.A = A_true
    true_model.weight_model.W = W_true
    true_model.bias_model.lambda0[0] = 0.2
    assert true_model.check_stability()

    # Sample from the true model
    S,R = true_model.generate(T=T, keep=True, print_interval=50)

    plt.ion()
    true_figure, _ = true_model.plot(color="#377eb8", T_slice=(0,100))

    # Save the true figure
    true_figure.savefig("gifs/true.gif")

    ###########################################################
    # Create a test spike and slab model
    ###########################################################
    test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt_max=dt_max, network=network)

    test_model.add_data(S)

    # Initialize plots
    test_figure, test_handles = test_model.plot(color="#e41a1c", T_slice=(0,100))
    test_figure.savefig("gifs/test0.gif")

    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 100
    samples = []
    lps = []
    for itr in xrange(N_samples):
        print "Gibbs iteration ", itr
        test_model.resample_model()
        lps.append(test_model.log_probability())
        samples.append(test_model.copy_sample())

        # Update plots
        test_model.plot(handles=test_handles)
        test_figure.savefig("gifs/test%d.gif" % (itr+1))
def demo(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print("Setting seed to ", seed)
    np.random.seed(seed)

    ###########################################################
    # Load some example data.
    # See data/synthetic/generate.py to create more.
    ###########################################################
    data_path = os.path.join("data", "synthetic",
                             "synthetic_K20_C4_T10000.pkl.gz")
    with gzip.open(data_path, 'r') as f:
        S, true_model = pickle.load(f)

    T = S.shape[0]
    K = true_model.K
    B = true_model.B
    dt = true_model.dt
    dt_max = true_model.dt_max

    ###########################################################
    # Initialize with MAP estimation on a standard Hawkes model
    ###########################################################
    init_with_map = True
    if init_with_map:
        init_len = T
        print("Initializing with BFGS on first ", init_len, " time bins.")
        init_model = DiscreteTimeStandardHawkesModel(K=K,
                                                     dt=dt,
                                                     dt_max=dt_max,
                                                     B=B,
                                                     alpha=1.0,
                                                     beta=1.0)
        init_model.add_data(S[:init_len, :])

        init_model.initialize_to_background_rate()
        init_model.fit_with_bfgs()
    else:
        init_model = None

    ###########################################################
    # Create a test spike and slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    network_hypers = true_model.network_hypers.copy()
    network_hypers['c'] = None
    network_hypers['v'] = None
    network_hypers['m'] = None
    test_network = StochasticBlockModel(K=K, **network_hypers)
    test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K,
        dt=dt,
        dt_max=dt_max,
        B=B,
        basis_hypers=true_model.basis_hypers,
        bkgd_hypers=true_model.bkgd_hypers,
        impulse_hypers=true_model.impulse_hypers,
        weight_hypers=true_model.weight_hypers,
        network=test_network)
    test_model.add_data(S)
    # F_test = test_model.basis.convolve_with_basis(S_test)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)

    # Initialize plots
    ln, im_net, im_clus = initialize_plots(true_model, test_model, S)

    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 50
    samples = []
    lps = []
    # plls = []
    for itr in range(N_samples):
        lps.append(test_model.log_probability())
        # plls.append(test_model.heldout_log_likelihood(S_test, F=F_test))
        samples.append(test_model.copy_sample())

        print("")
        print("Gibbs iteration ", itr)
        print("LP: ", lps[-1])

        test_model.resample_model()

        # Update plot
        if itr % 1 == 0:
            update_plots(itr, test_model, S, ln, im_clus, im_net)

    ###########################################################
    # Analyze the samples
    ###########################################################
    analyze_samples(true_model, init_model, samples, lps)
Beispiel #19
0
def demo(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    ###########################################################
    # Load some example data.
    # See data/synthetic/generate.py to create more.
    ###########################################################
    data_path = os.path.join("data", "synthetic", "synthetic_K20_C4_T10000.pkl.gz")
    with gzip.open(data_path, 'r') as f:
        S, true_model = cPickle.load(f)

    T      = S.shape[0]
    K      = true_model.K
    B      = true_model.B
    dt     = true_model.dt
    dt_max = true_model.dt_max

    ###########################################################
    # Initialize with MAP estimation on a standard Hawkes model
    ###########################################################
    init_with_map = True
    if init_with_map:
        init_len   = T
        print "Initializing with BFGS on first ", init_len, " time bins."
        init_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, dt_max=dt_max, B=B,
                                                     alpha=1.0, beta=1.0)
        init_model.add_data(S[:init_len, :])

        init_model.initialize_to_background_rate()
        init_model.fit_with_bfgs()
    else:
        init_model = None

    ###########################################################
    # Create a test spike and slab model
    ###########################################################

    # Copy the network hypers.
    # Give the test model p, but not c, v, or m
    network_hypers = true_model.network_hypers.copy()
    network_hypers['c'] = None
    network_hypers['v'] = None
    network_hypers['m'] = None
    test_network = StochasticBlockModel(K=K, **network_hypers)
    test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max, B=B,
                                                            basis_hypers=true_model.basis_hypers,
                                                            bkgd_hypers=true_model.bkgd_hypers,
                                                            impulse_hypers=true_model.impulse_hypers,
                                                            weight_hypers=true_model.weight_hypers,
                                                            network=test_network)
    test_model.add_data(S)
    # F_test = test_model.basis.convolve_with_basis(S_test)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)

    # Initialize plots
    ln, im_net, im_clus = initialize_plots(true_model, test_model, S)

    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 50
    samples = []
    lps = []
    # plls = []
    for itr in xrange(N_samples):
        lps.append(test_model.log_probability())
        # plls.append(test_model.heldout_log_likelihood(S_test, F=F_test))
        samples.append(test_model.copy_sample())

        print ""
        print "Gibbs iteration ", itr
        print "LP: ", lps[-1]

        test_model.resample_model()

        # Update plot
        if itr % 1 == 0:
            update_plots(itr, test_model, S, ln, im_clus, im_net)

    ###########################################################
    # Analyze the samples
    ###########################################################
    analyze_samples(true_model, init_model, samples, lps)
Beispiel #20
0
def demo(K=3, T=1000, dt_max=20, p=0.25):
    """

    :param K:       Number of nodes
    :param T:       Number of time bins to simulate
    :param dt_max:  Number of future time bins an event can influence
    :param p:       Sparsity of network
    :return:
    """
    ###########################################################
    # Generate synthetic data
    ###########################################################
    network_hypers = {"p": p, "allow_self_connections": False}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt_max=dt_max,
        network_hypers=network_hypers)
    assert true_model.check_stability()

    # Sample from the true model
    S,R = true_model.generate(T=T, keep=True, print_interval=50)

    plt.ion()
    true_figure, _ = true_model.plot(color="#377eb8", T_slice=(0,100))

    ###########################################################
    # Create a test spike and slab model
    ###########################################################
    test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt_max=dt_max,
        network_hypers=network_hypers)

    test_model.add_data(S)

    # Initialize plots
    test_figure, test_handles = test_model.plot(color="#e41a1c", T_slice=(0,100))

    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 100
    samples = []
    lps = []
    for itr in xrange(N_samples):
        print "Gibbs iteration ", itr
        test_model.resample_model()
        lps.append(test_model.log_probability())
        samples.append(test_model.copy_sample())

        # Update plots
        test_model.plot(handles=test_handles)

    ###########################################################
    # Analyze the samples
    ###########################################################
    analyze_samples(true_model, samples, lps)
    """
    K = 1
    T = 50
    dt = 1.0
    dt_max = 3.0
    # network_hypers = {'C': 1, 'p': 0.5, 'kappa': 3.0, 'alpha': 3.0, 'beta': 1.0/20.0}
    network_hypers = {
        'c': np.zeros(K, dtype=np.int),
        'p': 0.5,
        'kappa': 10.0,
        'v': 10 * 3.0
    }
    bkgd_hypers = {"alpha": 1., "beta": 10.}
    model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K,
        dt=dt,
        dt_max=dt_max,
        weight_hypers={"parallel_resampling": False},
        network_hypers=network_hypers)
    model.generate(T=T)

    # Gibbs sample and then generate new data
    N_samples = 10000
    samples = []
    lps = []
    for itr in progprint_xrange(N_samples, perline=50):
        # Resample the model
        model.resample_model()
        samples.append(model.copy_sample())
        lps.append(model.log_likelihood())

        # Geweke step
Beispiel #22
0
def fit_spikeslab_network_hawkes_gibbs(S,
                                       S_test,
                                       dt,
                                       dt_max,
                                       output_path,
                                       model_args={},
                                       standard_model=None,
                                       N_samples=100,
                                       time_limit=8 * 60 * 60):

    T, K = S.shape

    # Check for existing Gibbs results
    if os.path.exists(output_path):
        with gzip.open(output_path, 'r') as f:
            print("Loading Gibbs results from ", output_path)
            results = pickle.load(f)
    else:
        print(
            "Fitting the data with a network Hawkes model using Gibbs sampling"
        )

        test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K,
                                                                dt=dt,
                                                                dt_max=dt_max,
                                                                **model_args)
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        # TODO: Precompute F_test
        F_test = test_model.basis.convolve_with_basis(S_test)

        # Gibbs sample
        samples = []
        lps = [test_model.log_probability()]
        hlls = [test_model.heldout_log_likelihood(S_test)]
        times = [0]
        for _ in progprint_xrange(N_samples, perline=10):
            # Update the model
            tic = time.time()
            test_model.resample_model()
            samples.append(copy.deepcopy(test_model.get_parameters()))
            times.append(time.time() - tic)

            # Compute log probability and heldout log likelihood
            # lps.append(test_model.log_probability())
            hlls.append(test_model.heldout_log_likelihood(S_test, F=F_test))

            # # Save this sample
            # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
            #     cPickle.dump(samples[-1], f, protocol=-1)

            # Check if time limit has been exceeded
            if np.sum(times) > time_limit:
                break

        # Get cumulative timestamps
        timestamps = np.cumsum(times)
        lps = np.array(lps)
        hlls = np.array(hlls)

        # Make results object
        results = Results(samples, timestamps, lps, hlls)

        # Save the Gibbs samples
        with gzip.open(output_path, 'w') as f:
            print("Saving Gibbs samples to ", output_path)
            pickle.dump(results, f, protocol=-1)

    return results
Beispiel #23
0
def test_gibbs_sbm(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print("Setting seed to ", seed)
    np.random.seed(seed)

    C = 2
    K = 100
    c = np.arange(C).repeat(np.ceil(K / float(C)))[:K]
    T = 1000
    dt = 1.0
    B = 3

    # Generate from a true model
    true_p = np.random.rand(C, C) * 0.25
    true_network = StochasticBlockModel(K, C, c=c, p=true_p, v=10.0)
    true_model = \
        DiscreteTimeNetworkHawkesModelSpikeAndSlab(
                K=K, dt=dt, B=B, network=true_network)

    S, R = true_model.generate(T)

    # Plot the true network
    plt.ion()
    true_im = true_model.plot_adjacency_matrix()
    plt.pause(0.001)

    # Make a new model for inference
    test_network = StochasticBlockModel(K, C, beta=1. / K)
    test_model = \
        DiscreteTimeNetworkHawkesModelSpikeAndSlab(
                K=K, dt=dt, B=B, network=test_network)
    test_model.add_data(S)

    # Gibbs sample
    N_samples = 100
    c_samples = []
    lps = []
    for itr in progprint_xrange(N_samples):
        c_samples.append(test_network.c.copy())
        lps.append(test_model.log_probability())

        # Resample the network only
        test_model.network.resample(
            (true_model.weight_model.A, true_model.weight_model.W))

    c_samples = np.array(c_samples)
    plt.ioff()

    # Compute sample statistics for second half of samples
    print("True c: ", true_model.network.c)
    print("Test c: ", c_samples[-10:, :])

    # Compute the adjusted mutual info score of the clusterings
    amis = []
    arss = []
    for c in c_samples:
        amis.append(adjusted_mutual_info_score(true_model.network.c, c))
        arss.append(adjusted_rand_score(true_model.network.c, c))

    plt.figure()
    plt.plot(np.arange(N_samples), amis, '-r')
    plt.plot(np.arange(N_samples), arss, '-b')
    plt.xlabel("Iteration")
    plt.ylabel("Clustering score")
    plt.show()
def generate_synthetic_data(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    # Create a true model
    # Larger v (weight scale) implies smaller weights

    T_test=1000

    # Debugging network:
    # C = 1
    # K = 4
    # T = 1000
    # dt = 1.0
    # B = 3
    # p = 0.5
    # kappa = 3.0
    # v = kappa * 5.0
    # c = np.zeros(K, dtype=np.int)

    # Small network:
    # Seed: 1957629166
    # C = 4
    # K = 20
    # T = 10000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.9 * np.eye(C) + 0.05 * (1-np.eye(C))
    # v = kappa * (5.0 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Medium network:
    # Seed: 2723361959
    # C = 5
    # K = 50
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.75 * np.eye(C) + 0.05 * (1-np.eye(C))
    # v = kappa * (9 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Medium netowrk 2:
    # Seed = 3848328624
    # C = 5
    # K = 50
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 2.0
    # c = np.arange(C).repeat((K // C))
    # p = 0.4 * np.eye(C) + 0.01 * (1-np.eye(C))
    # v = kappa * (5 * np.eye(C) + 5.0 * (1-np.eye(C)))

    # Medium netowrk, one cluster
    # Seed: 3848328624
    C = 1
    K = 50
    T = 100000
    dt = 1.0
    B = 3
    p = 0.08
    kappa = 3.0
    v = kappa * 5.0
    c = np.zeros(K, dtype=np.int)

    # Large network:
    # Seed = 2467634490
    # C = 5
    # K = 100
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.4 * np.eye(C) + 0.025 * (1-np.eye(C))
    # v = kappa * (10 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Large network 2:
    # Seed =
    # C = 10
    # K = 100
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.75 * np.eye(C) + 0.05 * (1-np.eye(C))
    # v = kappa * (9 * np.eye(C) + 25.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))

    # Extra large network:
    # Seed: 2327447870
    # C = 20
    # K = 1000
    # T = 100000
    # dt = 1.0
    # B = 3
    # kappa = 3.0
    # p = 0.25 * np.eye(C) + 0.0025 * (1-np.eye(C))
    # v = kappa * (15 * np.eye(C) + 30.0 * (1-np.eye(C)))
    # c = np.arange(C).repeat((K // C))


    # Create the model with these parameters
    network_hypers = {'C': C, 'kappa': kappa, 'c': c, 'p': p, 'v': v}

    # Create a simple network
    from pyhawkes.internals.network import ErdosRenyiFixedSparsity
    network = ErdosRenyiFixedSparsity(K, p, kappa, v=v)

    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, B=B,
                                                            network=network)

    assert true_model.check_stability()

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A,
                 true_model.weight_model.W)
    plt.pause(0.001)

    # Sample from the true model
    S,R = true_model.generate(T=T, keep=False, print_interval=50)

    # Pickle and save the data
    out_dir  = os.path.join('data', "synthetic")
    out_name = 'synthetic_K%d_C%d_T%d.pkl.gz' % (K,C,T)
    out_path = os.path.join(out_dir, out_name)
    with gzip.open(out_path, 'w') as f:
        print "Saving output to ", out_path
        cPickle.dump((S, true_model), f, protocol=-1)

    # Sample test data
    S_test,_ = true_model.generate(T=T_test, keep=False)

    # Pickle and save the data
    out_dir  = os.path.join('data', "synthetic")
    out_name = 'synthetic_test_K%d_C%d_T%d.pkl.gz' % (K,C,T_test)
    out_path = os.path.join(out_dir, out_name)
    with gzip.open(out_path, 'w') as f:
        print "Saving output to ", out_path
        cPickle.dump((S_test, true_model), f, protocol=-1)
Beispiel #25
0
def test_gibbs_sbm(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 10
    K = 100
    T = 1000
    dt = 1.0
    B = 3

    # Generate from a true model
    network_hypers = {'C': C, 'beta': 1.0/K}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, B=B,
                                                            network_hypers=network_hypers)
    # S,R = true_model.generate(T=T)
    c = true_model.network.c
    perm = np.argsort(c)

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A[np.ix_(perm, perm)],
                 true_model.weight_model.W[np.ix_(perm, perm)])
    plt.pause(0.001)


    # Make a new model for inference
    network_hypers = {'C': C, 'beta': 1.0/K}
    test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, B=B,
                                                            network_hypers=network_hypers)
    # test_model.add_data(S)

    # Gibbs sample
    N_samples = 10
    samples = []
    lps = []
    for itr in xrange(N_samples):
        if itr % 5 == 0:
            print "Iteration: ", itr
        samples.append(copy.deepcopy(test_model.get_parameters()))

        lps.append(test_model.log_probability())

        # Resample the network only
        test_model.network.resample((true_model.weight_model.A,
                                     true_model.weight_model.W))

    plt.ioff()

    # Compute sample statistics for second half of samples
    c_samples       = np.array([c for _,_,_,_,c,_,_,_ in samples])

    print "True c: ", true_model.network.c
    print "Test c: ", c_samples[-10:, :]

    # Compute the adjusted mutual info score of the clusterings
    amis = []
    arss = []
    for c in c_samples:
        amis.append(adjusted_mutual_info_score(true_model.network.c, c))
        arss.append(adjusted_rand_score(true_model.network.c, c))

    plt.figure()
    plt.plot(np.arange(N_samples), amis, '-r')
    plt.plot(np.arange(N_samples), arss, '-b')
    plt.xlabel("Iteration")
    plt.ylabel("Clustering score")
    plt.show()
Beispiel #26
0
def fit_spikeslab_network_hawkes_gibbs(S, S_test, dt, dt_max, output_path,
                                       model_args={}, standard_model=None,
                                       N_samples=100, time_limit=8*60*60):

    T,K = S.shape

    # Check for existing Gibbs results
    if os.path.exists(output_path):
        with gzip.open(output_path, 'r') as f:
            print "Loading Gibbs results from ", output_path
            results = cPickle.load(f)
    else:
        print "Fitting the data with a network Hawkes model using Gibbs sampling"

        test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max, **model_args)
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        # TODO: Precompute F_test
        F_test = test_model.basis.convolve_with_basis(S_test)


        # Gibbs sample
        samples = []
        lps = [test_model.log_probability()]
        hlls = [test_model.heldout_log_likelihood(S_test)]
        times = [0]
        for _ in progprint_xrange(N_samples, perline=10):
            # Update the model
            tic = time.time()
            test_model.resample_model()
            samples.append(copy.deepcopy(test_model.get_parameters()))
            times.append(time.time() - tic)

            # Compute log probability and heldout log likelihood
            # lps.append(test_model.log_probability())
            hlls.append(test_model.heldout_log_likelihood(S_test, F=F_test))

            # # Save this sample
            # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
            #     cPickle.dump(samples[-1], f, protocol=-1)

            # Check if time limit has been exceeded
            if np.sum(times) > time_limit:
                break

        # Get cumulative timestamps
        timestamps = np.cumsum(times)
        lps = np.array(lps)
        hlls = np.array(hlls)

        # Make results object
        results = Results(samples, timestamps, lps, hlls)

        # Save the Gibbs samples
        with gzip.open(output_path, 'w') as f:
            print "Saving Gibbs samples to ", output_path
            cPickle.dump(results, f, protocol=-1)

    return results
Beispiel #27
0
def geweke_test():
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    T = 50
    dt = 1.0
    dt_max = 3.0
    network_hypers = {
        'C': 1,
        'p': 0.5,
        'kappa': 3.0,
        'alpha': 3.0,
        'beta': 1.0 / 20.0
    }
    model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=1, dt=dt, dt_max=dt_max, network_hypers=network_hypers)
    model.generate(T=T)

    # Gibbs sample and then generate new data
    N_samples = 10000
    samples = []
    lps = []
    for itr in xrange(N_samples):
        if itr % 10 == 0:
            print "Iteration: ", itr
        # Resample the model
        model.resample_model()
        samples.append(model.copy_sample())
        lps.append(model.log_probability())

        # Geweke step
        model.data_list.pop()
        model.generate(T=T)

    # Compute sample statistics for second half of samples
    A_samples = np.array([s.weight_model.A for s in samples])
    W_samples = np.array([s.weight_model.W for s in samples])
    g_samples = np.array([s.impulse_model.g for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    c_samples = np.array([s.network.c for s in samples])
    p_samples = np.array([s.network.p for s in samples])
    v_samples = np.array([s.network.v for s in samples])
    lps = np.array(lps)

    offset = 0
    A_mean = A_samples[offset:, ...].mean(axis=0)
    W_mean = W_samples[offset:, ...].mean(axis=0)
    g_mean = g_samples[offset:, ...].mean(axis=0)
    lambda0_mean = lambda0_samples[offset:, ...].mean(axis=0)

    print "A mean:        ", A_mean
    print "W mean:        ", W_mean
    print "g mean:        ", g_mean
    print "lambda0 mean:  ", lambda0_mean

    # Plot the log probability over iterations
    plt.figure()
    plt.plot(np.arange(N_samples), lps)
    plt.xlabel("Iteration")
    plt.ylabel("Log probability")

    # Plot the histogram of bias samples
    plt.figure()
    p_lmbda0 = gamma(model.bias_model.alpha, scale=1. / model.bias_model.beta)
    _, bins, _ = plt.hist(lambda0_samples[:, 0],
                          bins=20,
                          alpha=0.5,
                          normed=True)
    bincenters = 0.5 * (bins[1:] + bins[:-1])
    plt.plot(bincenters, p_lmbda0.pdf(bincenters), 'r--', linewidth=1)
    plt.xlabel('lam0')
    plt.ylabel('p(lam0)')

    print "Expected p(A):  ", model.network.P
    print "Empirical p(A): ", A_samples.mean(axis=0)

    # Plot the histogram of weight samples
    plt.figure()
    Aeq1 = A_samples[:, 0, 0] == 1
    # p_W1 = gamma(model.network.kappa, scale=1./model.network.v[0,0])

    # The marginal distribution of W under a gamma prior on the scale
    # is a beta prime distribution
    p_W1 = betaprime(model.network.kappa,
                     model.network.alpha,
                     scale=model.network.beta)

    _, bins, _ = plt.hist(W_samples[Aeq1, 0, 0],
                          bins=20,
                          alpha=0.5,
                          normed=True)
    bincenters = 0.5 * (bins[1:] + bins[:-1])
    plt.plot(bincenters, p_W1.pdf(bincenters), 'r--', linewidth=1)
    plt.xlabel('W')
    plt.ylabel('p(W | A=1)')

    # Plot the histogram of impulse samples
    plt.figure()
    for b in range(model.B):
        plt.subplot(1, model.B, b + 1)
        a = model.impulse_model.gamma[b]
        b = model.impulse_model.gamma.sum() - a
        p_beta11b = beta(a, b)

        _, bins, _ = plt.hist(g_samples[:, 0, 0, b],
                              bins=20,
                              alpha=0.5,
                              normed=True)
        bincenters = 0.5 * (bins[1:] + bins[:-1])
        plt.plot(bincenters, p_beta11b.pdf(bincenters), 'r--', linewidth=1)
        plt.xlabel('g_%d' % b)
        plt.ylabel('p(g_%d)' % b)

    # Plot the histogram of weight scale
    plt.figure()
    for c1 in range(model.C):
        for c2 in range(model.C):
            plt.subplot(model.C, model.C, 1 + c1 * model.C + c2)
            p_v = gamma(model.network.alpha, scale=1. / model.network.beta)

            _, bins, _ = plt.hist(v_samples[:, c1, c2],
                                  bins=20,
                                  alpha=0.5,
                                  normed=True)
            bincenters = 0.5 * (bins[1:] + bins[:-1])
            plt.plot(bincenters, p_v.pdf(bincenters), 'r--', linewidth=1)
            plt.xlabel('v_{%d,%d}' % (c1, c2))
            plt.ylabel('p(v)')

    plt.show()
def fit_network_hawkes_gibbs(S, K, C, dt, dt_max, output_path, standard_model=None):

    # Check for existing Gibbs results
    if os.path.exists(output_path + ".gibbs.pkl"):
        with open(output_path + ".gibbs.pkl", "r") as f:
            print "Loading Gibbs results from ", (output_path + ".gibbs.pkl")
            (samples, timestamps) = cPickle.load(f)

    else:
        print "Fitting the data with a network Hawkes model using Gibbs sampling"

        # Make a new model for inference
        # test_model = DiscreteTimeNetworkHawkesModelGammaMixture(C=C, K=K, dt=dt, dt_max=dt_max, B=B,
        #                                                         alpha=1.0, beta=1.0/20.0)
        test_basis = IdentityBasis(dt, dt_max, allow_instantaneous=True)

        # Set the network prior such that E[W] ~= 0.01
        # W ~ Gamma(kappa, v) for kappa = 1.25 => v ~ 125
        # v ~ Gamma(alpha, beta) for alpha = 10, beta = 10 / 125
        E_W = 0.01
        kappa = 10.0
        E_v = kappa / E_W
        alpha = 10.0
        beta = alpha / E_v
        network_hypers = {
            "C": 2,
            "kappa": kappa,
            "alpha": alpha,
            "beta": beta,
            "p": 0.8,
            "allow_self_connections": False,
        }
        test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
            K=K, dt=dt, dt_max=dt_max, basis=test_basis, network_hypers=network_hypers
        )
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        plt.ion()
        im = plot_network(test_model.weight_model.A, test_model.weight_model.W, vmax=0.5)
        plt.pause(0.001)

        # Gibbs sample
        N_samples = 100
        samples = []
        lps = [test_model.log_probability()]
        timestamps = []
        for itr in xrange(N_samples):
            if itr % 1 == 0:
                print "Iteration ", itr, "\tLL: ", lps[-1]
                im.set_data(test_model.weight_model.W_effective)
                plt.pause(0.001)

            # lps.append(test_model.log_probability())
            lps.append(test_model.log_probability())
            samples.append(test_model.resample_and_copy())
            timestamps.append(time.clock())

            # Save this sample
            with open(output_path + ".gibbs.itr%04d.pkl" % itr, "w") as f:
                cPickle.dump(samples[-1], f, protocol=-1)

        # Save the Gibbs samples
        with open(output_path + ".gibbs.pkl", "w") as f:
            print "Saving Gibbs samples to ", (output_path + ".gibbs.pkl")
            cPickle.dump((samples, timestamps), f, protocol=-1)

    return samples, timestamps
Beispiel #29
0
def demo(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 1
    K = 10
    T = 1000
    dt = 1.0
    B = 3

    # Create a true model
    p = 0.8 * np.eye(C)
    v = 10.0 * np.eye(C) + 20.0 * (1 - np.eye(C))
    # m = 0.5 * np.ones(C)
    c = (0.0 * (np.arange(K) < 10) + 1.0 * (np.arange(K) >= 10)).astype(np.int)
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(C=C,
                                                            K=K,
                                                            dt=dt,
                                                            B=B,
                                                            c=c,
                                                            p=p,
                                                            v=v)

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A,
                 true_model.weight_model.W,
                 vmax=0.5)
    plt.pause(0.001)

    # Sample from the true model
    S, R = true_model.generate(T=T)

    # Make a new model for inference
    test_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, B=B, beta=1.0)
    test_model.add_data(S)

    # Plot the true and inferred firing rate
    kplt = 0
    plt.figure()
    plt.plot(np.arange(T), R[:, kplt], '-k', lw=2)
    plt.ion()
    ln = plt.plot(np.arange(T), test_model.compute_rate(ks=kplt), '-r')[0]
    plt.show()

    # Gradient descent
    N_steps = 10000
    lls = []
    for itr in xrange(N_steps):
        W, ll, grad = test_model.gradient_descent_step(stepsz=0.001)
        lls.append(ll)

        # Update plot
        if itr % 5 == 0:
            ln.set_data(np.arange(T), test_model.compute_rate(ks=kplt))
            plt.title("Iteration %d" % itr)
            plt.pause(0.001)

    plt.ioff()

    print "W true:        ", true_model.weight_model.A * true_model.weight_model.W
    print "lambda0 true:  ", true_model.bias_model.lambda0
    print "ll true:       ", true_model.log_likelihood()
    print ""
    print "W test:        ", test_model.W
    print "lambda0 test   ", test_model.bias
    print "ll test:       ", test_model.log_likelihood()

    plt.figure()
    plt.plot(np.arange(N_steps), lls)
    plt.xlabel("Iteration")
    plt.ylabel("Log likelihood")

    plot_network(np.ones((K, K)), test_model.W, vmax=0.5)
    plt.show()
def main(args):
    try:
        os.system('mkdir {0}'.format(args.savedir))
    except:
        pass
    #Get teh country
    country = args.datafile.split('/')[-1].split('_')[0]
    #Load the data
    df = loaders.load_country_data(args.datafile, index_col=False)
    #Stitch the data together on a real number range
    date_ordinals = pd.DataFrame(pd.date_range('2001-01-01',
                                               '2005-12-31').values,
                                 columns=['date'])
    #Convert each group to the date range
    print('generate the groups to the dates')
    gnames = []
    date_grouped = df.groupby(['gname', 'date']).agg({
        'eventid': 'count'
    }).reset_index()
    for group, groupdf in date_grouped.groupby('gname'):
        gnames.append(group)
        #Set the new columns
        rgdf = groupdf.rename(columns={'eventid': group})
        #merge it
        date_ordinals = date_ordinals.merge(rgdf.loc[:, ['date', group]],
                                            how='left')
    #Now we have a merged date_ordinals, so write it out
    date_ordinals.to_csv('../../data/%s_multihawkes_data.csv' % country)
    #read it back in
    date_ordinals = pd.read_csv('../../data/%s_multihawkes_data.csv' % country,
                                index_col=0)
    date_ordinals.fillna(0, inplace=True)
    #Set the index on 'date' since we don't care about it
    date_ordinals.set_index('date', inplace=True)
    date_ordinals = date_ordinals.applymap(int)
    #Parameter setting
    K = len(date_ordinals.columns)
    dt_max = len(date_ordinals)
    p = 0.25
    network_hypers = {"p": p, "allow_self_connections": True}
    #set-up the model
    hawkes_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt_max=dt_max, network_hypers=network_hypers)
    hawkes_model.add_data(np.array(date_ordinals.values.tolist()))
    #Set-up the runs
    srfpass = False
    loopcount = 0
    #set-up the model
    hawkes_models = {}
    for ichain in range(args.num_chains):
        hawkes_models[ichain] = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
            K=K, dt_max=dt_max, network_hypers=network_hypers)
        hawkes_models[ichain].add_data(np.array(date_ordinals.values.tolist()))
    #hold variables
    parameter_trace = {
        ichain: {g: []
                 for g in gnames}
        for ichain in range(args.num_chains)
    }
    trace_stats = {
        ichain: {g: {
            'mean': 0,
            'std': 0
        }
                 for g in gnames}
        for ichain in range(args.num_chains)
    }
    while srfpass == False:
        #resample all chains
        for ichain in range(args.num_chains):
            hawkes_models[ichain].resample_model()
            #Record the parameters
            for i, group in enumerate(gnames):
                parameter_trace[ichain][group].append(hawkes_model.lambda0[i])
                #Calculate the stats
                trace_stats[ichain][group]['mean'] = np.mean(
                    parameter_trace[ichain][group][args.burn::args.thin])
                trace_stats[ichain][group]['std'] = np.std(
                    parameter_trace[ichain][group][args.burn::args.thin])
        #increment
        print(loopcount)
        loopcount += 1
        #Start checking
        if loopcount > 1000 and loopcount % args.thin == 0:
            #Calculate out the parts
            B = calcB(trace_stats, gnames, args.num_chains)
            W = calcW(trace_stats, gnames, args.num_chains)
            VarSig = calcVar(W, B, args.num_chains)
            R = calcR(VarSig, W)
            #SRF pass check
            srf_pass_set = []
            for param, srf_val in R.items():
                if abs(srf_val - 1.0) < args.tol:
                    srf_pass_set.append(1)
            if np.mean(srf_pass_set) == 1:
                srfpass = True
    #Write out the SRFs
    with open('%s/%s_srf.csv' % (args.savedir, country), 'w') as wfile:
        print('group,B,W,V,R', file=wfile)
        for gname in B.keys():
            print('%s,%f,%f,%f,%f' %
                  (gname, B[gname], W[gname], VarSig[gname], R[gname]),
                  file=wfile)
    #Pull the data
    dataset = {}
    header = ['gname', 'A', 'B', 'W_effective', 'lambda0']
    for i, group in enumerate(gnames):
        dataset[group] = {
            'B': float(hawkes_model.B),
            'W': hawkes_model.W_effective[i].tolist(),
            'lambda': float(hawkes_model.lambda0[i])
        }
    json.dump(dataset,
              open('%s/%s_multihawkes.json' % (args.savedir, country), 'w'),
              indent=4)
Beispiel #31
0
def test_gibbs_sbm(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 10
    K = 100
    T = 1000
    dt = 1.0
    B = 3

    # Generate from a true model
    network_hypers = {'C': C, 'beta': 1.0 / K}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt=dt, B=B, network_hypers=network_hypers)
    # S,R = true_model.generate(T=T)
    c = true_model.network.c
    perm = np.argsort(c)

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A[np.ix_(perm, perm)],
                 true_model.weight_model.W[np.ix_(perm, perm)])
    plt.pause(0.001)

    # Make a new model for inference
    network_hypers = {'C': C, 'beta': 1.0 / K}
    test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt=dt, B=B, network_hypers=network_hypers)
    # test_model.add_data(S)

    # Gibbs sample
    N_samples = 10
    samples = []
    lps = []
    for itr in xrange(N_samples):
        if itr % 5 == 0:
            print "Iteration: ", itr
        samples.append(copy.deepcopy(test_model.get_parameters()))

        lps.append(test_model.log_probability())

        # Resample the network only
        test_model.network.resample(
            (true_model.weight_model.A, true_model.weight_model.W))

    plt.ioff()

    # Compute sample statistics for second half of samples
    c_samples = np.array([c for _, _, _, _, c, _, _, _ in samples])

    print "True c: ", true_model.network.c
    print "Test c: ", c_samples[-10:, :]

    # Compute the adjusted mutual info score of the clusterings
    amis = []
    arss = []
    for c in c_samples:
        amis.append(adjusted_mutual_info_score(true_model.network.c, c))
        arss.append(adjusted_rand_score(true_model.network.c, c))

    plt.figure()
    plt.plot(np.arange(N_samples), amis, '-r')
    plt.plot(np.arange(N_samples), arss, '-b')
    plt.xlabel("Iteration")
    plt.ylabel("Clustering score")
    plt.show()
Beispiel #32
0
def demo(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    raise NotImplementedError("This example needs to be updated.")

    if seed is None:
        seed = np.random.randint(2**32)

    print "Setting seed to ", seed
    np.random.seed(seed)

    C = 1
    K = 10
    T = 1000
    dt = 1.0
    B = 3

    # Create a true model
    p = 0.8 * np.eye(C)
    v = 10.0 * np.eye(C) + 20.0 * (1-np.eye(C))
    # m = 0.5 * np.ones(C)
    c = (0.0 * (np.arange(K) < 10) + 1.0 * (np.arange(K)  >= 10)).astype(np.int)
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(C=C, K=K, dt=dt, B=B, c=c, p=p, v=v)

    # Plot the true network
    plt.ion()
    plot_network(true_model.weight_model.A,
                 true_model.weight_model.W,
                 vmax=0.5)
    plt.pause(0.001)

    # Sample from the true model
    S,R = true_model.generate(T=T)


    # Make a new model for inference
    test_model = DiscreteTimeStandardHawkesModel(K=K, dt=dt, B=B, beta=1.0)
    test_model.add_data(S)

    # Plot the true and inferred firing rate
    kplt = 0
    plt.figure()
    plt.plot(np.arange(T), R[:,kplt], '-k', lw=2)
    plt.ion()
    ln = plt.plot(np.arange(T), test_model.compute_rate(ks=kplt), '-r')[0]
    plt.show()

    # Gradient descent
    N_steps = 10000
    lls = []
    for itr in xrange(N_steps):
        W,ll,grad = test_model.gradient_descent_step(stepsz=0.001)
        lls.append(ll)

        # Update plot
        if itr % 5 == 0:
            ln.set_data(np.arange(T), test_model.compute_rate(ks=kplt))
            plt.title("Iteration %d" % itr)
            plt.pause(0.001)

    plt.ioff()

    print "W true:        ", true_model.weight_model.A * true_model.weight_model.W
    print "lambda0 true:  ", true_model.bias_model.lambda0
    print "ll true:       ", true_model.log_likelihood()
    print ""
    print "W test:        ", test_model.W
    print "lambda0 test   ", test_model.bias
    print "ll test:       ", test_model.log_likelihood()


    plt.figure()
    plt.plot(np.arange(N_steps), lls)
    plt.xlabel("Iteration")
    plt.ylabel("Log likelihood")

    plot_network(np.ones((K,K)), test_model.W, vmax=0.5)
    plt.show()
Beispiel #33
0
def demo(K=3, T=1000, dt_max=20, p=0.25):
    """

    :param K:       Number of nodes
    :param T:       Number of time bins to simulate
    :param dt_max:  Number of future time bins an event can influence
    :param p:       Sparsity of network
    :return:
    """
    ###########################################################
    # Generate synthetic data
    ###########################################################
    network = ErdosRenyiFixedSparsity(K, p, v=1., allow_self_connections=False)
    bkgd_hypers = {"alpha": 1.0, "beta": 20.0}
    true_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(
        K=K, dt_max=dt_max, bkgd_hypers=bkgd_hypers, network=network)
    A_true = np.zeros((K, K))
    A_true[0, 1] = A_true[0, 2] = 1
    W_true = np.zeros((K, K))
    W_true[0, 1] = W_true[0, 2] = 1.0
    true_model.weight_model.A = A_true
    true_model.weight_model.W = W_true
    true_model.bias_model.lambda0[0] = 0.2
    assert true_model.check_stability()

    # Sample from the true model
    S, R = true_model.generate(T=T, keep=True, print_interval=50)

    plt.ion()
    true_figure, _ = true_model.plot(color="#377eb8", T_slice=(0, 100))

    # Save the true figure
    true_figure.savefig("gifs/true.gif")

    ###########################################################
    # Create a test spike and slab model
    ###########################################################
    test_model = DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K,
                                                            dt_max=dt_max,
                                                            network=network)

    test_model.add_data(S)

    # Initialize plots
    test_figure, test_handles = test_model.plot(color="#e41a1c",
                                                T_slice=(0, 100))
    test_figure.savefig("gifs/test0.gif")

    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 100
    samples = []
    lps = []
    for itr in xrange(N_samples):
        print "Gibbs iteration ", itr
        test_model.resample_model()
        lps.append(test_model.log_probability())
        samples.append(test_model.copy_sample())

        # Update plots
        test_model.plot(handles=test_handles)
        test_figure.savefig("gifs/test%d.gif" % (itr + 1))
Beispiel #34
0
def test_gibbs_sbm(seed=None):
    """
    Create a discrete time Hawkes model and generate from it.

    :return:
    """
    if seed is None:
        seed = np.random.randint(2**32)

    print("Setting seed to ", seed)
    np.random.seed(seed)

    C = 2
    K = 100
    c = np.arange(C).repeat(np.ceil(K/float(C)))[:K]
    T = 1000
    dt = 1.0
    B = 3

    # Generate from a true model
    true_p = np.random.rand(C,C) * 0.25
    true_network = StochasticBlockModel(K, C, c=c, p=true_p, v=10.0)
    true_model = \
        DiscreteTimeNetworkHawkesModelSpikeAndSlab(
                K=K, dt=dt, B=B, network=true_network)

    S,R = true_model.generate(T)

    # Plot the true network
    plt.ion()
    true_im = true_model.plot_adjacency_matrix()
    plt.pause(0.001)


    # Make a new model for inference
    test_network = StochasticBlockModel(K, C, beta=1./K)
    test_model = \
        DiscreteTimeNetworkHawkesModelSpikeAndSlab(
                K=K, dt=dt, B=B, network=test_network)
    test_model.add_data(S)

    # Gibbs sample
    N_samples = 100
    c_samples = []
    lps = []
    for itr in progprint_xrange(N_samples):
        c_samples.append(test_network.c.copy())
        lps.append(test_model.log_probability())

        # Resample the network only
        test_model.network.resample((true_model.weight_model.A,
                                     true_model.weight_model.W))

    c_samples = np.array(c_samples)
    plt.ioff()

    # Compute sample statistics for second half of samples
    print("True c: ", true_model.network.c)
    print("Test c: ", c_samples[-10:, :])

    # Compute the adjusted mutual info score of the clusterings
    amis = []
    arss = []
    for c in c_samples:
        amis.append(adjusted_mutual_info_score(true_model.network.c, c))
        arss.append(adjusted_rand_score(true_model.network.c, c))

    plt.figure()
    plt.plot(np.arange(N_samples), amis, '-r')
    plt.plot(np.arange(N_samples), arss, '-b')
    plt.xlabel("Iteration")
    plt.ylabel("Clustering score")
    plt.show()