def simulate_data(self, Sigma_x=None, seed=None): """Simulate data from the model. Returns models.common.data instance Parameters ---------- Sigma_x : {None, 'rand', ndarray} The covariance structure of the explanatory variable. This is scaled to regulate the uncertainty. If not provided or None, identity matrix is used. Providing string 'rand' uses method common.rand_corr_vine to randomise one. """ # Localise params J = self.J D = self.D npg = self.npg # Set seed rnd_data = np.random.RandomState(seed=seed) # Draw random seed for input covariance for consistency in randomness # even if not needed seed_input_cov = rnd_data.randint(2**31-1) # Randomise input covariance structure if needed if Sigma_x == 'rand': Sigma_x = rand_corr_vine(D, seed=seed_input_cov) # Parameters # Number of observations for each group if hasattr(npg, '__getitem__') and len(npg) == 2: Nj = rnd_data.randint(npg[0],npg[1]+1, size=J) else: Nj = npg*np.ones(J, dtype=np.int64) # Total number of observations N = np.sum(Nj) # Observation index limits for J groups j_lim = np.concatenate(([0], np.cumsum(Nj))) # Group indices for each sample j_ind = np.empty(N, dtype=np.int64) for j in xrange(J): j_ind[j_lim[j]:j_lim[j+1]] = j # Assign parameters if SIGMA is None: sigma = np.exp(rnd_data.randn()*SIGMA_H) else: sigma = SIGMA if SIGMA_A is None: sigma_a = np.exp(rnd_data.randn()*SIGMA_AH) else: sigma_a = SIGMA_A if BETA is None: beta = rnd_data.randn(D)*SIGMA_B else: beta = BETA # Regulate beta beta_sum = np.sum(beta) while np.abs(beta_sum) < B_ABS_MIN_SUM: # Replace one random element in beta index = rnd_data.randint(D) beta_sum -= beta[index] beta[index] = rnd_data.randn()*SIGMA_B beta_sum += beta[index] alpha_j = rnd_data.randn(J)*sigma_a phi_true = np.empty(self.dphi) phi_true[0] = np.log(sigma) phi_true[1] = np.log(sigma_a) phi_true[2:] = beta # Determine suitable sigma_x sigma_x = calc_input_param_lin_reg(beta, sigma, Sigma_x) # Simulate data if Sigma_x is None: X = rnd_data.randn(N,D)*sigma_x else: cho_x = cholesky(Sigma_x) X = rnd_data.randn(N,D).dot(sigma_x*cho_x) y_true = alpha_j[j_ind] + X.dot(beta) y = y_true + rnd_data.randn(N)*sigma return data( X, y, {'sigma_x':sigma_x, 'Sigma_x':Sigma_x}, y_true, Nj, j_lim, j_ind, {'phi':phi_true, 'alpha':alpha_j, 'beta':beta, 'sigma':sigma} )
def simulate_data(self, Sigma_x=None, seed=None): """Simulate data from the model. Returns models.common.data instance Parameters ---------- Sigma_x : {None, 'rand', ndarray} The covariance structure of the explanatory variable. This is scaled to regulate the uncertainty. If not provided or None, identity matrix is used. Providing string 'rand' uses method common.rand_corr_vine to randomise one. """ # Localise params J = self.J D = self.D npg = self.npg # Set seed rnd_data = np.random.RandomState(seed=seed) # Draw random seed for input covariance for consistency in randomness # even if not needed seed_input_cov = rnd_data.randint(2 ** 31 - 1) # Randomise input covariance structure if needed if Sigma_x == "rand": Sigma_x = rand_corr_vine(D, seed=seed_input_cov) # Parameters # Number of observations for each group if hasattr(npg, "__getitem__") and len(npg) == 2: Nj = rnd_data.randint(npg[0], npg[1] + 1, size=J) else: Nj = npg * np.ones(J, dtype=np.int64) # Total number of observations N = np.sum(Nj) # Observation index limits for J groups j_lim = np.concatenate(([0], np.cumsum(Nj))) # Group indices for each sample j_ind = np.empty(N, dtype=np.int64) for j in xrange(J): j_ind[j_lim[j] : j_lim[j + 1]] = j # Assign parameters if SIGMA is None: sigma = np.exp(rnd_data.randn() * SIGMA_H) else: sigma = SIGMA if SIGMA_A is None: sigma_a = np.exp(rnd_data.randn() * SIGMA_SA) else: sigma_a = SIGMA_A if MU_A is None: mu_a = rnd_data.randn() * SIGMA_MA else: mu_a = MU_A sigma_b = np.exp(rnd_data.randn(D) * SIGMA_SB) mu_b = rnd_data.randn(D) * SIGMA_MB alpha_j = mu_a + rnd_data.randn(J) * sigma_a beta_j = mu_b + rnd_data.randn(J, D) * sigma_b # Regulate beta for j in xrange(J): beta_sum = np.sum(beta_j[j]) while np.abs(beta_sum) < B_ABS_MIN_SUM: # Replace one random element in beta index = rnd_data.randint(D) beta_sum -= beta_j[j, index] beta_j[j, index] = mu_b[index] + rnd_data.randn() * sigma_b[index] beta_sum += beta_j[j, index] phi_true = np.empty(self.dphi) phi_true[0] = np.log(sigma) phi_true[1] = mu_a phi_true[2] = np.log(sigma_a) phi_true[3 : 3 + D] = mu_b phi_true[3 + D :] = np.log(sigma_b) # Determine suitable sigma_x sigma_x_j = calc_input_param_lin_reg(beta_j, sigma, Sigma_x) # Simulate data # Different sigma_x for every group X = np.empty((N, D)) if Sigma_x is None: for j in xrange(J): X[j_lim[j] : j_lim[j + 1], :] = rnd_data.randn(Nj[j], D) * sigma_x_j[j] else: cho_x = cholesky(Sigma_x) for j in xrange(J): X[j_lim[j] : j_lim[j + 1], :] = rnd_data.randn(Nj[j], D).dot(sigma_x_j[j] * cho_x) y_true = np.empty(N) for n in xrange(N): y_true[n] = alpha_j[j_ind[n]] + X[n].dot(beta_j[j_ind[n]]) y = y_true + rnd_data.randn(N) * sigma return data( X, y, {"sigma_x": sigma_x_j, "Sigma_x": Sigma_x}, y_true, Nj, j_lim, j_ind, {"phi": phi_true, "alpha": alpha_j, "beta": beta_j, "sigma": sigma}, )
def simulate_data(self, Sigma_x=None, seed=None): """Simulate data from the model. Returns models.common.data instance Parameters ---------- Sigma_x : {None, 'rand', ndarray} The covariance structure of the explanatory variable. This is scaled to regulate the uncertainty. If not provided or None, identity matrix is used. Providing string 'rand' uses method common.rand_corr_vine to randomise one. """ # Localise params J = self.J D = self.D npg = self.npg # Set seed rnd_data = np.random.RandomState(seed=seed) # Draw random seed for input covariance for consistency in randomness # even if not needed seed_input_cov = rnd_data.randint(2**31-1) # Randomise input covariance structure if needed if Sigma_x == 'rand': Sigma_x = rand_corr_vine(D, seed=seed_input_cov) # Parameters # Number of observations for each group if hasattr(npg, '__getitem__') and len(npg) == 2: Nj = rnd_data.randint(npg[0],npg[1]+1, size=J) else: Nj = npg*np.ones(J, dtype=np.int64) # Total number of observations N = np.sum(Nj) # Observation index limits for J groups j_lim = np.concatenate(([0], np.cumsum(Nj))) # Group indices for each sample j_ind = np.empty(N, dtype=np.int64) for j in xrange(J): j_ind[j_lim[j]:j_lim[j+1]] = j # Assign parameters if SIGMA_A is None: sigma_a = np.exp(rnd_data.randn()*SIGMA_SA) else: sigma_a = SIGMA_A if MU_A is None: mu_a = rnd_data.randn()*SIGMA_MA else: mu_a = MU_A sigma_b = np.exp(rnd_data.randn(D)*SIGMA_SB) mu_b = rnd_data.randn(D)*SIGMA_MB alpha_j = mu_a + rnd_data.randn(J)*sigma_a beta_j = mu_b + rnd_data.randn(J,D)*sigma_b # Regulate beta for j in xrange(J): beta_sum = np.sum(beta_j[j]) while np.abs(beta_sum) < B_ABS_MIN_SUM: # Replace one random element in beta index = rnd_data.randint(D) beta_sum -= beta_j[j,index] beta_j[j,index] = mu_b[index] + rnd_data.randn()*sigma_b[index] beta_sum += beta_j[j,index] phi_true = np.empty(self.dphi) phi_true[0] = mu_a phi_true[1] = np.log(sigma_a) phi_true[2:2+D] = mu_b phi_true[2+D:] = np.log(sigma_b) # Determine suitable mu_x and sigma_x mu_x_j, sigma_x_j = calc_input_param_classification( alpha_j, beta_j, Sigma_x ) # Simulate data # Different mu_x and sigma_x for every group X = np.empty((N,D)) if Sigma_x is None: for j in xrange(J): X[j_lim[j]:j_lim[j+1],:] = \ mu_x_j[j] + rnd_data.randn(Nj[j],D)*sigma_x_j[j] else: cho_x = cholesky(Sigma_x) for j in xrange(J): X[j_lim[j]:j_lim[j+1],:] = \ mu_x_j[j] + rnd_data.randn(Nj[j],D).dot(sigma_x_j[j]*cho_x) y = np.empty(N) for n in xrange(N): y[n] = alpha_j[j_ind[n]] + X[n].dot(beta_j[j_ind[n]]) y = 1/(1+np.exp(-y)) y_true = (0.5 < y).astype(int) y = (rnd_data.rand(N) < y).astype(int) return data( X, y, {'mu_x':mu_x_j, 'sigma_x':sigma_x_j, 'Sigma_x':Sigma_x}, y_true, Nj, j_lim, j_ind, {'phi':phi_true, 'alpha':alpha_j, 'beta':beta_j} )
def simulate_data(self, Sigma_x=None, seed=None): """Simulate data from the model. Returns models.common.data instance Parameters ---------- Sigma_x : {None, 'rand', ndarray} The covariance structure of the explanatory variable. This is scaled to regulate the uncertainty. If not provided or None, identity matrix is used. Providing string 'rand' uses method common.rand_corr_vine to randomise one. """ # Localise params J = self.J D = self.D npg = self.npg # Set seed rnd_data = np.random.RandomState(seed=seed) # Draw random seed for input covariance for consistency in randomness # even if not needed seed_input_cov = rnd_data.randint(2**31-1) # Randomise input covariance structure if needed if Sigma_x == 'rand': Sigma_x = rand_corr_vine(D, seed=seed_input_cov) # Parameters # Number of observations for each group if hasattr(npg, '__getitem__') and len(npg) == 2: Nj = rnd_data.randint(npg[0],npg[1]+1, size=J) else: Nj = npg*np.ones(J, dtype=np.int64) # Total number of observations N = np.sum(Nj) # Observation index limits for J groups j_lim = np.concatenate(([0], np.cumsum(Nj))) # Group indices for each sample j_ind = np.empty(N, dtype=np.int64) for j in xrange(J): j_ind[j_lim[j]:j_lim[j+1]] = j # Assign parameters if SIGMA_A is None: sigma_a = np.exp(rnd_data.randn()*SIGMA_AH) else: sigma_a = SIGMA_A sigma_b = np.exp(rnd_data.randn(D)*SIGMA_BH) alpha_j = rnd_data.randn(J)*sigma_a beta_j = rnd_data.randn(J,D)*sigma_b # Regulate beta for j in xrange(J): beta_sum = np.sum(beta_j[j]) while np.abs(beta_sum) < B_ABS_MIN_SUM: # Replace one random element in beta index = rnd_data.randint(D) beta_sum -= beta_j[j,index] beta_j[j,index] = rnd_data.randn()*sigma_b[index] beta_sum += beta_j[j,index] phi_true = np.append(np.log(sigma_a), np.log(sigma_b)) # Determine suitable mu_x and sigma_x mu_x_j, sigma_x_j = calc_input_param_classification( alpha_j, beta_j, Sigma_x ) # Simulate data # Different mu_x and sigma_x for every group X = np.empty((N,D)) if Sigma_x is None: for j in xrange(J): X[j_lim[j]:j_lim[j+1],:] = \ mu_x_j[j] + rnd_data.randn(Nj[j],D)*sigma_x_j[j] else: cho_x = cholesky(Sigma_x) for j in xrange(J): X[j_lim[j]:j_lim[j+1],:] = \ mu_x_j[j] + rnd_data.randn(Nj[j],D).dot(sigma_x_j[j]*cho_x) y = np.empty(N) for n in xrange(N): y[n] = alpha_j[j_ind[n]] + X[n].dot(beta_j[j_ind[n]]) y = 1/(1+np.exp(-y)) y_true = (0.5 < y).astype(int) y = (rnd_data.rand(N) < y).astype(int) return data( X, y, {'mu_x':mu_x_j, 'sigma_x':sigma_x_j, 'Sigma_x':Sigma_x}, y_true, Nj, j_lim, j_ind, {'phi':phi_true, 'alpha':alpha_j, 'beta':beta_j} )