def generate_synth_data(V=10, D=10, T=5, N=100, alpha_beta=10., alpha_theta=10., plot=False, train_frac=0.5): # true_lda = StandardLDA(T, V, alpha_beta=alpha_beta, alpha_theta=alpha_theta) true_lda = LogisticNormalCorrelatedLDA(T, V, alpha_beta=alpha_beta) print("Sigma: ", true_lda.Sigma) # true_lda = StickbreakingCorrelatedLDA(T, V, alpha_beta=alpha_beta) data = np.zeros((D,V),dtype=int) for d in xrange(D): doc = true_lda.generate(N=N, keep=True) data[d,:] = doc.w if plot: plt.figure() plt.imshow(data, interpolation="none") plt.xlabel("Vocabulary") plt.ylabel("Documents") plt.colorbar() plt.show() # Split each document into two train_data = np.zeros_like(data) test_data = np.zeros_like(data) for d,w in enumerate(data): # Get vector where i is repeated w[i] times wcnt = ibincount(w) # Subsample wcnt train_inds = np.random.rand(wcnt.size) < train_frac train_data[d] = np.bincount(wcnt[train_inds], minlength=V) test_data[d] = np.bincount(wcnt[~train_inds], minlength=V) assert np.allclose(train_data[d] + test_data[d], w) return true_lda, train_data, test_data
def generate_dataset(bias=1.): # Create the model with these parameters network_hypers = { 'C': 1, 'kappa': 1., 'c': np.zeros(K, dtype=np.int), 'p': 1 * np.ones((1, 1)), 'v': 100. } bkgd_hypers = {"alpha": 3., "beta": 3. / bias} dt_model = pyhawkes.models.\ DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max, B=B, bkgd_hypers=bkgd_hypers, network_hypers=network_hypers) # dt_model.bias_model.lambda0 = bias * np.ones(K) assert dt_model.check_stability() S_dt, _ = dt_model.generate(T=int(np.ceil(T / dt)), keep=False) print "sampled dataset with ", S_dt.sum(), "events" # Convert S_dt to continuous time S_ct = dt * np.concatenate([ibincount(S) for S in S_dt.T]).astype(float) S_ct += dt * np.random.rand(*S_ct.shape) assert np.all(S_ct < T) C_ct = np.concatenate([k * np.ones(S.sum()) for k, S in enumerate(S_dt.T)]).astype(int) # Sort the data perm = np.argsort(S_ct) S_ct = S_ct[perm] C_ct = C_ct[perm] return S_dt, S_ct, C_ct
def generate_dataset(bias=1.): # Create the model with these parameters network_hypers = {'C': 1, 'kappa': 1., 'c': np.zeros(K, dtype=np.int), 'p': 1*np.ones((1,1)), 'v': 100.} bkgd_hypers = {"alpha": 3., "beta": 3./bias} dt_model = pyhawkes.models.\ DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max, B=B, bkgd_hypers=bkgd_hypers, network_hypers=network_hypers) # dt_model.bias_model.lambda0 = bias * np.ones(K) assert dt_model.check_stability() S_dt,_ = dt_model.generate(T=int(np.ceil(T/dt)), keep=False) print "sampled dataset with ", S_dt.sum(), "events" # Convert S_dt to continuous time S_ct = dt * np.concatenate([ibincount(S) for S in S_dt.T]).astype(float) S_ct += dt * np.random.rand(*S_ct.shape) assert np.all(S_ct < T) C_ct = np.concatenate([k*np.ones(S.sum()) for k,S in enumerate(S_dt.T)]).astype(int) # Sort the data perm = np.argsort(S_ct) S_ct = S_ct[perm] C_ct = C_ct[perm] return S_dt, S_ct, C_ct
def convert_discrete_to_continuous(S, dt): # Convert S to continuous time from pybasicbayes.util.general import ibincount T = S.shape[0] * dt S_ct = dt * np.concatenate([ibincount(Sk) for Sk in S.T]).astype(float) S_ct += dt * np.random.rand(*S_ct.shape) assert np.all(S_ct < T) C_ct = np.concatenate([k*np.ones(Sk.sum()) for k,Sk in enumerate(S.T)]).astype(int) # Sort the data perm = np.argsort(S_ct) S_ct = S_ct[perm] C_ct = C_ct[perm] return S_ct, C_ct, T
def downsample_data(X, n): """ Downsample each row of X such that it sums to n by randomly removing entries """ from pybasicbayes.util.general import ibincount assert X.ndim == 2 D,K = X.shape Xsub = X.copy().astype(np.int) for d in xrange(D): xi = ibincount(Xsub[d]) Xsub[d] = np.bincount(np.random.choice(xi, size=n, replace=False), minlength=K) assert Xsub[d].sum() == n return Xsub.astype(np.float)
def downsample_data(X, n): """ Downsample each row of X such that it sums to n by randomly removing entries """ from pybasicbayes.util.general import ibincount assert X.ndim == 2 D,K = X.shape Xsub = X.copy().astype(np.int) for d in range(D): xi = ibincount(Xsub[d]) Xsub[d] = np.bincount(np.random.choice(xi, size=n, replace=False), minlength=K) assert Xsub[d].sum() == n return Xsub.astype(np.float)
dt_max = 10. T = 100. network_hypers = {'kappa': 1., 'p': 1., 'v': 10.} dt_model = pyhawkes.models.\ DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max, B=B, network_hypers=network_hypers) assert dt_model.check_stability() S_dt, _ = dt_model.generate(T=int(np.ceil(T / dt)), keep=False) print("sampled dataset with ", S_dt.sum(), "events") print("DT LL: ", dt_model.heldout_log_likelihood(S_dt)) # Convert S_test to continuous time S_ct = dt * np.concatenate([ibincount(S) for S in S_dt.T]).astype(float) S_ct += dt * np.random.rand(*S_ct.shape) assert np.all(S_ct < T) C_ct = np.concatenate([k * np.ones(S.sum()) for k, S in enumerate(S_dt.T)]).astype(int) # Sort the data perm = np.argsort(S_ct) S_ct = S_ct[perm] C_ct = C_ct[perm] ct_model = pyhawkes.models.ContinuousTimeNetworkHawkesModel( K, dt_max=1., network_hypers=network_hypers) ct_model.add_data(S_ct, C_ct, T) # ct.resample_model()
dt_max = 10. T = 100. network_hypers = {'kappa': 1., 'p': 1., 'v': 10.} dt_model = pyhawkes.models.\ DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max, B=B, network_hypers=network_hypers) assert dt_model.check_stability() S_dt,_ = dt_model.generate(T=int(np.ceil(T/dt)), keep=False) print("sampled dataset with ", S_dt.sum(), "events") print("DT LL: ", dt_model.heldout_log_likelihood(S_dt)) # Convert S_test to continuous time S_ct = dt * np.concatenate([ibincount(S) for S in S_dt.T]).astype(float) S_ct += dt * np.random.rand(*S_ct.shape) assert np.all(S_ct < T) C_ct = np.concatenate([k*np.ones(S.sum()) for k,S in enumerate(S_dt.T)]).astype(int) # Sort the data perm = np.argsort(S_ct) S_ct = S_ct[perm] C_ct = C_ct[perm] ct_model = pyhawkes.models.ContinuousTimeNetworkHawkesModel(K, dt_max=1., network_hypers=network_hypers) ct_model.add_data(S_ct, C_ct, T) # ct.resample_model() # Hard code parameters