def simulate_data(self, Sigma_x=None, seed=None): """Simulate data from the model. Returns models.common.data instance Parameters ---------- Sigma_x : {None, 'rand', ndarray} The covariance structure of the explanatory variable. This is scaled to regulate the uncertainty. If not provided or None, identity matrix is used. Providing string 'rand' uses method common.rand_corr_vine to randomise one. """ # Localise params J = self.J D = self.D npg = self.npg # Set seed rnd_data = np.random.RandomState(seed=seed) # Draw random seed for input covariance for consistency in randomness # even if not needed seed_input_cov = rnd_data.randint(2**31-1) # Randomise input covariance structure if needed if Sigma_x == 'rand': Sigma_x = rand_corr_vine(D, seed=seed_input_cov) # Parameters # Number of observations for each group if hasattr(npg, '__getitem__') and len(npg) == 2: Nj = rnd_data.randint(npg[0],npg[1]+1, size=J) else: Nj = npg*np.ones(J, dtype=np.int64) # Total number of observations N = np.sum(Nj) # Observation index limits for J groups j_lim = np.concatenate(([0], np.cumsum(Nj))) # Group indices for each sample j_ind = np.empty(N, dtype=np.int64) for j in xrange(J): j_ind[j_lim[j]:j_lim[j+1]] = j # Assign parameters if SIGMA is None: sigma = np.exp(rnd_data.randn()*SIGMA_H) else: sigma = SIGMA if SIGMA_A is None: sigma_a = np.exp(rnd_data.randn()*SIGMA_AH) else: sigma_a = SIGMA_A if BETA is None: beta = rnd_data.randn(D)*SIGMA_B else: beta = BETA # Regulate beta beta_sum = np.sum(beta) while np.abs(beta_sum) < B_ABS_MIN_SUM: # Replace one random element in beta index = rnd_data.randint(D) beta_sum -= beta[index] beta[index] = rnd_data.randn()*SIGMA_B beta_sum += beta[index] alpha_j = rnd_data.randn(J)*sigma_a phi_true = np.empty(self.dphi) phi_true[0] = np.log(sigma) phi_true[1] = np.log(sigma_a) phi_true[2:] = beta # Determine suitable sigma_x sigma_x = calc_input_param_lin_reg(beta, sigma, Sigma_x) # Simulate data if Sigma_x is None: X = rnd_data.randn(N,D)*sigma_x else: cho_x = cholesky(Sigma_x) X = rnd_data.randn(N,D).dot(sigma_x*cho_x) y_true = alpha_j[j_ind] + X.dot(beta) y = y_true + rnd_data.randn(N)*sigma return data( X, y, {'sigma_x':sigma_x, 'Sigma_x':Sigma_x}, y_true, Nj, j_lim, j_ind, {'phi':phi_true, 'alpha':alpha_j, 'beta':beta, 'sigma':sigma} )
def simulate_data(self, Sigma_x=None, seed=None): """Simulate data from the model. Returns models.common.data instance Parameters ---------- Sigma_x : {None, 'rand', ndarray} The covariance structure of the explanatory variable. This is scaled to regulate the uncertainty. If not provided or None, identity matrix is used. Providing string 'rand' uses method common.rand_corr_vine to randomise one. """ # Localise params J = self.J D = self.D npg = self.npg # Set seed rnd_data = np.random.RandomState(seed=seed) # Draw random seed for input covariance for consistency in randomness # even if not needed seed_input_cov = rnd_data.randint(2**31-1) # Randomise input covariance structure if needed if Sigma_x == 'rand': Sigma_x = rand_corr_vine(D, seed=seed_input_cov) # Parameters # Number of observations for each group if hasattr(npg, '__getitem__') and len(npg) == 2: Nj = rnd_data.randint(npg[0],npg[1]+1, size=J) else: Nj = npg*np.ones(J, dtype=np.int64) # Total number of observations N = np.sum(Nj) # Observation index limits for J groups j_lim = np.concatenate(([0], np.cumsum(Nj))) # Group indices for each sample j_ind = np.empty(N, dtype=np.int64) for j in xrange(J): j_ind[j_lim[j]:j_lim[j+1]] = j # Assign parameters if SIGMA_A is None: sigma_a = np.exp(rnd_data.randn()*SIGMA_SA) else: sigma_a = SIGMA_A if MU_A is None: mu_a = rnd_data.randn()*SIGMA_MA else: mu_a = MU_A sigma_b = np.exp(rnd_data.randn(D)*SIGMA_SB) mu_b = rnd_data.randn(D)*SIGMA_MB alpha_j = mu_a + rnd_data.randn(J)*sigma_a beta_j = mu_b + rnd_data.randn(J,D)*sigma_b # Regulate beta for j in xrange(J): beta_sum = np.sum(beta_j[j]) while np.abs(beta_sum) < B_ABS_MIN_SUM: # Replace one random element in beta index = rnd_data.randint(D) beta_sum -= beta_j[j,index] beta_j[j,index] = mu_b[index] + rnd_data.randn()*sigma_b[index] beta_sum += beta_j[j,index] phi_true = np.empty(self.dphi) phi_true[0] = mu_a phi_true[1] = np.log(sigma_a) phi_true[2:2+D] = mu_b phi_true[2+D:] = np.log(sigma_b) # Determine suitable mu_x and sigma_x mu_x_j, sigma_x_j = calc_input_param_classification( alpha_j, beta_j, Sigma_x ) # Simulate data # Different mu_x and sigma_x for every group X = np.empty((N,D)) if Sigma_x is None: for j in xrange(J): X[j_lim[j]:j_lim[j+1],:] = \ mu_x_j[j] + rnd_data.randn(Nj[j],D)*sigma_x_j[j] else: cho_x = cholesky(Sigma_x) for j in xrange(J): X[j_lim[j]:j_lim[j+1],:] = \ mu_x_j[j] + rnd_data.randn(Nj[j],D).dot(sigma_x_j[j]*cho_x) y = np.empty(N) for n in xrange(N): y[n] = alpha_j[j_ind[n]] + X[n].dot(beta_j[j_ind[n]]) y = 1/(1+np.exp(-y)) y_true = (0.5 < y).astype(int) y = (rnd_data.rand(N) < y).astype(int) return data( X, y, {'mu_x':mu_x_j, 'sigma_x':sigma_x_j, 'Sigma_x':Sigma_x}, y_true, Nj, j_lim, j_ind, {'phi':phi_true, 'alpha':alpha_j, 'beta':beta_j} )
'his', 'him', 'hers', 'her', 'their', 'they', 'who', 'what', 'when', 'where', 'why', 'how', )) # Retrieve encrypted message fpath = data("p59_cipher.txt") encrypted = None with open(fpath) as fh: encrypted = fh.read().split(',') # Encryption functions for testing def encrypt_char(ecrypted_ord, key): return str(ord(ecrypted_ord) ^ ord(key)) def encrypt(msg, pw): return list(starmap(encrypt_char, zip(msg, cycle(pw))))
def simulate_data(self, Sigma_x=None, seed=None): """Simulate data from the model. Returns models.common.data instance Parameters ---------- Sigma_x : {None, 'rand', ndarray} The covariance structure of the explanatory variable. This is scaled to regulate the uncertainty. If not provided or None, identity matrix is used. Providing string 'rand' uses method common.rand_corr_vine to randomise one. """ # Localise params J = self.J D = self.D npg = self.npg # Set seed rnd_data = np.random.RandomState(seed=seed) # Draw random seed for input covariance for consistency in randomness # even if not needed seed_input_cov = rnd_data.randint(2 ** 31 - 1) # Randomise input covariance structure if needed if Sigma_x == "rand": Sigma_x = rand_corr_vine(D, seed=seed_input_cov) # Parameters # Number of observations for each group if hasattr(npg, "__getitem__") and len(npg) == 2: Nj = rnd_data.randint(npg[0], npg[1] + 1, size=J) else: Nj = npg * np.ones(J, dtype=np.int64) # Total number of observations N = np.sum(Nj) # Observation index limits for J groups j_lim = np.concatenate(([0], np.cumsum(Nj))) # Group indices for each sample j_ind = np.empty(N, dtype=np.int64) for j in xrange(J): j_ind[j_lim[j] : j_lim[j + 1]] = j # Assign parameters if SIGMA is None: sigma = np.exp(rnd_data.randn() * SIGMA_H) else: sigma = SIGMA if SIGMA_A is None: sigma_a = np.exp(rnd_data.randn() * SIGMA_SA) else: sigma_a = SIGMA_A if MU_A is None: mu_a = rnd_data.randn() * SIGMA_MA else: mu_a = MU_A sigma_b = np.exp(rnd_data.randn(D) * SIGMA_SB) mu_b = rnd_data.randn(D) * SIGMA_MB alpha_j = mu_a + rnd_data.randn(J) * sigma_a beta_j = mu_b + rnd_data.randn(J, D) * sigma_b # Regulate beta for j in xrange(J): beta_sum = np.sum(beta_j[j]) while np.abs(beta_sum) < B_ABS_MIN_SUM: # Replace one random element in beta index = rnd_data.randint(D) beta_sum -= beta_j[j, index] beta_j[j, index] = mu_b[index] + rnd_data.randn() * sigma_b[index] beta_sum += beta_j[j, index] phi_true = np.empty(self.dphi) phi_true[0] = np.log(sigma) phi_true[1] = mu_a phi_true[2] = np.log(sigma_a) phi_true[3 : 3 + D] = mu_b phi_true[3 + D :] = np.log(sigma_b) # Determine suitable sigma_x sigma_x_j = calc_input_param_lin_reg(beta_j, sigma, Sigma_x) # Simulate data # Different sigma_x for every group X = np.empty((N, D)) if Sigma_x is None: for j in xrange(J): X[j_lim[j] : j_lim[j + 1], :] = rnd_data.randn(Nj[j], D) * sigma_x_j[j] else: cho_x = cholesky(Sigma_x) for j in xrange(J): X[j_lim[j] : j_lim[j + 1], :] = rnd_data.randn(Nj[j], D).dot(sigma_x_j[j] * cho_x) y_true = np.empty(N) for n in xrange(N): y_true[n] = alpha_j[j_ind[n]] + X[n].dot(beta_j[j_ind[n]]) y = y_true + rnd_data.randn(N) * sigma return data( X, y, {"sigma_x": sigma_x_j, "Sigma_x": Sigma_x}, y_true, Nj, j_lim, j_ind, {"phi": phi_true, "alpha": alpha_j, "beta": beta_j, "sigma": sigma}, )
def simulate_data(self, Sigma_x=None, seed=None): """Simulate data from the model. Returns models.common.data instance Parameters ---------- Sigma_x : {None, 'rand', ndarray} The covariance structure of the explanatory variable. This is scaled to regulate the uncertainty. If not provided or None, identity matrix is used. Providing string 'rand' uses method common.rand_corr_vine to randomise one. """ # Localise params J = self.J D = self.D npg = self.npg # Set seed rnd_data = np.random.RandomState(seed=seed) # Draw random seed for input covariance for consistency in randomness # even if not needed seed_input_cov = rnd_data.randint(2**31-1) # Randomise input covariance structure if needed if Sigma_x == 'rand': Sigma_x = rand_corr_vine(D, seed=seed_input_cov) # Parameters # Number of observations for each group if hasattr(npg, '__getitem__') and len(npg) == 2: Nj = rnd_data.randint(npg[0],npg[1]+1, size=J) else: Nj = npg*np.ones(J, dtype=np.int64) # Total number of observations N = np.sum(Nj) # Observation index limits for J groups j_lim = np.concatenate(([0], np.cumsum(Nj))) # Group indices for each sample j_ind = np.empty(N, dtype=np.int64) for j in xrange(J): j_ind[j_lim[j]:j_lim[j+1]] = j # Assign parameters if SIGMA_A is None: sigma_a = np.exp(rnd_data.randn()*SIGMA_AH) else: sigma_a = SIGMA_A sigma_b = np.exp(rnd_data.randn(D)*SIGMA_BH) alpha_j = rnd_data.randn(J)*sigma_a beta_j = rnd_data.randn(J,D)*sigma_b # Regulate beta for j in xrange(J): beta_sum = np.sum(beta_j[j]) while np.abs(beta_sum) < B_ABS_MIN_SUM: # Replace one random element in beta index = rnd_data.randint(D) beta_sum -= beta_j[j,index] beta_j[j,index] = rnd_data.randn()*sigma_b[index] beta_sum += beta_j[j,index] phi_true = np.append(np.log(sigma_a), np.log(sigma_b)) # Determine suitable mu_x and sigma_x mu_x_j, sigma_x_j = calc_input_param_classification( alpha_j, beta_j, Sigma_x ) # Simulate data # Different mu_x and sigma_x for every group X = np.empty((N,D)) if Sigma_x is None: for j in xrange(J): X[j_lim[j]:j_lim[j+1],:] = \ mu_x_j[j] + rnd_data.randn(Nj[j],D)*sigma_x_j[j] else: cho_x = cholesky(Sigma_x) for j in xrange(J): X[j_lim[j]:j_lim[j+1],:] = \ mu_x_j[j] + rnd_data.randn(Nj[j],D).dot(sigma_x_j[j]*cho_x) y = np.empty(N) for n in xrange(N): y[n] = alpha_j[j_ind[n]] + X[n].dot(beta_j[j_ind[n]]) y = 1/(1+np.exp(-y)) y_true = (0.5 < y).astype(int) y = (rnd_data.rand(N) < y).astype(int) return data( X, y, {'mu_x':mu_x_j, 'sigma_x':sigma_x_j, 'Sigma_x':Sigma_x}, y_true, Nj, j_lim, j_ind, {'phi':phi_true, 'alpha':alpha_j, 'beta':beta_j} )
def load_data(self, name, seed=None): """Simulate data from the model. Returns models.common.data instance """ # load data data_full = np.loadtxt('data/%s.txt' % name) # We obtain the features and the targets X = data_full[ :, range(data_full.shape[ 1 ] - 1) ] y = data_full[ :, data_full.shape[ 1 ] - 1 ].astype(int) # We create the train and test sets with 90% and 10% of the data permutation = np.random.choice(range(X.shape[ 0 ]), X.shape[ 0 ], replace = False) size_train = np.round(X.shape[ 0 ] * 0.9) index_train = permutation[ 0 : size_train ] index_test = permutation[ size_train : ] X_train = X[ index_train, : ] y_train = y[ index_train ] X_test = X[ index_test, : ] y_test = y[ index_test ] # Localise params J = self.J self.D = X_train.shape[1] D = self.D self.npg = int(X_train.shape[0] / float(J)) npg = self.npg X_train = X_train[:(self.npg*self.J), :] y_train = y_train[:(self.npg*self.J)] # Set seed rnd_data = np.random.RandomState(seed=seed) # Parameters # Number of observations for each group if hasattr(npg, '__getitem__') and len(npg) == 2: Nj = rnd_data.randint(npg[0],npg[1]+1, size=J) else: Nj = npg*np.ones(J, dtype=np.int64) # Total number of observations N = np.sum(Nj) # Observation index limits for J groups j_lim = np.concatenate(([0], np.cumsum(Nj))) # Group indices for each sample j_ind = np.empty(N, dtype=np.int64) for j in xrange(J): j_ind[j_lim[j]:j_lim[j+1]] = j if BETA is None: beta = rnd_data.randn(D)*SIGMA_B else: beta = BETA # Regulate beta beta_sum = np.sum(beta) while np.abs(beta_sum) < B_ABS_MIN_SUM: # Replace one random element in beta index = rnd_data.randint(D) beta_sum -= beta[index] beta[index] = rnd_data.randn()*SIGMA_B beta_sum += beta[index] phi_true = beta # Determine suitable mu_x and sigma_x mu_x_j, sigma_x_j = calc_input_param_classification(np.zeros(J), beta) return data( X_train, y_train, {'mu_x':mu_x_j, 'sigma_x':sigma_x_j}, y_train, Nj, j_lim, j_ind, {'phi':phi_true, 'beta':beta} ), X_test, y_test