Example #1
0
 def simulate_data(self, Sigma_x=None, seed=None):
     """Simulate data from the model.
     
     Returns models.common.data instance
     
     Parameters
     ----------
     Sigma_x : {None, 'rand', ndarray}
         The covariance structure of the explanatory variable. This is 
         scaled to regulate the uncertainty. If not provided or None, 
         identity matrix is used. Providing string 'rand' uses method
         common.rand_corr_vine to randomise one.
     
     """
     # Localise params
     J = self.J
     D = self.D
     npg = self.npg
     
     # Set seed
     rnd_data = np.random.RandomState(seed=seed)
     # Draw random seed for input covariance for consistency in randomness
     # even if not needed
     seed_input_cov = rnd_data.randint(2**31-1)
     
     # Randomise input covariance structure if needed
     if Sigma_x == 'rand':
         Sigma_x = rand_corr_vine(D, seed=seed_input_cov)
     
     # Parameters
     # Number of observations for each group
     if hasattr(npg, '__getitem__') and len(npg) == 2:
         Nj = rnd_data.randint(npg[0],npg[1]+1, size=J)
     else:
         Nj = npg*np.ones(J, dtype=np.int64)
     # Total number of observations
     N = np.sum(Nj)
     # Observation index limits for J groups
     j_lim = np.concatenate(([0], np.cumsum(Nj)))
     # Group indices for each sample
     j_ind = np.empty(N, dtype=np.int64)
     for j in xrange(J):
         j_ind[j_lim[j]:j_lim[j+1]] = j
     
     # Assign parameters
     if SIGMA is None:
         sigma = np.exp(rnd_data.randn()*SIGMA_H)
     else:
         sigma = SIGMA
     if SIGMA_A is None:
         sigma_a = np.exp(rnd_data.randn()*SIGMA_AH)
     else:
         sigma_a = SIGMA_A
     if BETA is None:
         beta = rnd_data.randn(D)*SIGMA_B
     else:
         beta = BETA
     
     # Regulate beta
     beta_sum = np.sum(beta)
     while np.abs(beta_sum) < B_ABS_MIN_SUM:
         # Replace one random element in beta
         index = rnd_data.randint(D)
         beta_sum -= beta[index]
         beta[index] = rnd_data.randn()*SIGMA_B
         beta_sum += beta[index]
     
     alpha_j = rnd_data.randn(J)*sigma_a
     phi_true = np.empty(self.dphi)
     phi_true[0] = np.log(sigma)
     phi_true[1] = np.log(sigma_a)
     phi_true[2:] = beta
     
     # Determine suitable sigma_x
     sigma_x = calc_input_param_lin_reg(beta, sigma, Sigma_x)
     
     # Simulate data
     if Sigma_x is None:
         X = rnd_data.randn(N,D)*sigma_x
     else:
         cho_x = cholesky(Sigma_x)
         X = rnd_data.randn(N,D).dot(sigma_x*cho_x)
     y_true = alpha_j[j_ind] + X.dot(beta)
     y = y_true + rnd_data.randn(N)*sigma
     
     return data(
         X, y, {'sigma_x':sigma_x, 'Sigma_x':Sigma_x}, y_true, Nj, j_lim, 
         j_ind, {'phi':phi_true, 'alpha':alpha_j, 'beta':beta, 
         'sigma':sigma}
     )
Example #2
0
 def simulate_data(self, Sigma_x=None, seed=None):
     """Simulate data from the model.
     
     Returns models.common.data instance
     
     Parameters
     ----------
     Sigma_x : {None, 'rand', ndarray}
         The covariance structure of the explanatory variable. This is 
         scaled to regulate the uncertainty. If not provided or None, 
         identity matrix is used. Providing string 'rand' uses method
         common.rand_corr_vine to randomise one.
     
     """
     # Localise params
     J = self.J
     D = self.D
     npg = self.npg
     
     # Set seed
     rnd_data = np.random.RandomState(seed=seed)
     # Draw random seed for input covariance for consistency in randomness
     # even if not needed
     seed_input_cov = rnd_data.randint(2**31-1)
     
     # Randomise input covariance structure if needed
     if Sigma_x == 'rand':
         Sigma_x = rand_corr_vine(D, seed=seed_input_cov)
     
     # Parameters
     # Number of observations for each group
     if hasattr(npg, '__getitem__') and len(npg) == 2:
         Nj = rnd_data.randint(npg[0],npg[1]+1, size=J)
     else:
         Nj = npg*np.ones(J, dtype=np.int64)
     # Total number of observations
     N = np.sum(Nj)
     # Observation index limits for J groups
     j_lim = np.concatenate(([0], np.cumsum(Nj)))
     # Group indices for each sample
     j_ind = np.empty(N, dtype=np.int64)
     for j in xrange(J):
         j_ind[j_lim[j]:j_lim[j+1]] = j
     
     # Assign parameters
     if SIGMA_A is None:
         sigma_a = np.exp(rnd_data.randn()*SIGMA_SA)
     else:
         sigma_a = SIGMA_A
     if MU_A is None:
         mu_a = rnd_data.randn()*SIGMA_MA
     else:
         mu_a = MU_A
     sigma_b = np.exp(rnd_data.randn(D)*SIGMA_SB)
     mu_b = rnd_data.randn(D)*SIGMA_MB
     alpha_j = mu_a + rnd_data.randn(J)*sigma_a
     beta_j = mu_b + rnd_data.randn(J,D)*sigma_b
     
     # Regulate beta
     for j in xrange(J):
         beta_sum = np.sum(beta_j[j])
         while np.abs(beta_sum) < B_ABS_MIN_SUM:
             # Replace one random element in beta
             index = rnd_data.randint(D)
             beta_sum -= beta_j[j,index]
             beta_j[j,index] = mu_b[index] + rnd_data.randn()*sigma_b[index]
             beta_sum += beta_j[j,index]
     
     phi_true = np.empty(self.dphi)
     phi_true[0] = mu_a
     phi_true[1] = np.log(sigma_a)
     phi_true[2:2+D] = mu_b
     phi_true[2+D:] = np.log(sigma_b)
     
     # Determine suitable mu_x and sigma_x
     mu_x_j, sigma_x_j = calc_input_param_classification(
         alpha_j, beta_j, Sigma_x
     )
     
     # Simulate data
     # Different mu_x and sigma_x for every group
     X = np.empty((N,D))
     if Sigma_x is None:
         for j in xrange(J):
             X[j_lim[j]:j_lim[j+1],:] = \
                 mu_x_j[j] + rnd_data.randn(Nj[j],D)*sigma_x_j[j]
     else:
         cho_x = cholesky(Sigma_x)
         for j in xrange(J):
             X[j_lim[j]:j_lim[j+1],:] = \
                 mu_x_j[j] + rnd_data.randn(Nj[j],D).dot(sigma_x_j[j]*cho_x)
     y = np.empty(N)
     for n in xrange(N):
         y[n] = alpha_j[j_ind[n]] + X[n].dot(beta_j[j_ind[n]])
     y = 1/(1+np.exp(-y))
     y_true = (0.5 < y).astype(int)
     y = (rnd_data.rand(N) < y).astype(int)
     
     return data(
         X, y, {'mu_x':mu_x_j, 'sigma_x':sigma_x_j, 'Sigma_x':Sigma_x}, 
         y_true, Nj, j_lim, j_ind, {'phi':phi_true, 'alpha':alpha_j, 
         'beta':beta_j}
     )
Example #3
0
    'his',
    'him',
    'hers',
    'her',
    'their',
    'they',
    'who',
    'what',
    'when',
    'where',
    'why',
    'how',
))

# Retrieve encrypted message
fpath = data("p59_cipher.txt")
encrypted = None

with open(fpath) as fh:
    encrypted = fh.read().split(',')


# Encryption functions for testing
def encrypt_char(ecrypted_ord, key):
    return str(ord(ecrypted_ord) ^ ord(key))


def encrypt(msg, pw):
    return list(starmap(encrypt_char, zip(msg, cycle(pw))))

Example #4
0
    def simulate_data(self, Sigma_x=None, seed=None):
        """Simulate data from the model.
        
        Returns models.common.data instance
        
        Parameters
        ----------
        Sigma_x : {None, 'rand', ndarray}
            The covariance structure of the explanatory variable. This is 
            scaled to regulate the uncertainty. If not provided or None, 
            identity matrix is used. Providing string 'rand' uses method
            common.rand_corr_vine to randomise one.
        
        """
        # Localise params
        J = self.J
        D = self.D
        npg = self.npg

        # Set seed
        rnd_data = np.random.RandomState(seed=seed)
        # Draw random seed for input covariance for consistency in randomness
        # even if not needed
        seed_input_cov = rnd_data.randint(2 ** 31 - 1)

        # Randomise input covariance structure if needed
        if Sigma_x == "rand":
            Sigma_x = rand_corr_vine(D, seed=seed_input_cov)

        # Parameters
        # Number of observations for each group
        if hasattr(npg, "__getitem__") and len(npg) == 2:
            Nj = rnd_data.randint(npg[0], npg[1] + 1, size=J)
        else:
            Nj = npg * np.ones(J, dtype=np.int64)
        # Total number of observations
        N = np.sum(Nj)
        # Observation index limits for J groups
        j_lim = np.concatenate(([0], np.cumsum(Nj)))
        # Group indices for each sample
        j_ind = np.empty(N, dtype=np.int64)
        for j in xrange(J):
            j_ind[j_lim[j] : j_lim[j + 1]] = j

        # Assign parameters
        if SIGMA is None:
            sigma = np.exp(rnd_data.randn() * SIGMA_H)
        else:
            sigma = SIGMA
        if SIGMA_A is None:
            sigma_a = np.exp(rnd_data.randn() * SIGMA_SA)
        else:
            sigma_a = SIGMA_A
        if MU_A is None:
            mu_a = rnd_data.randn() * SIGMA_MA
        else:
            mu_a = MU_A
        sigma_b = np.exp(rnd_data.randn(D) * SIGMA_SB)
        mu_b = rnd_data.randn(D) * SIGMA_MB
        alpha_j = mu_a + rnd_data.randn(J) * sigma_a
        beta_j = mu_b + rnd_data.randn(J, D) * sigma_b

        # Regulate beta
        for j in xrange(J):
            beta_sum = np.sum(beta_j[j])
            while np.abs(beta_sum) < B_ABS_MIN_SUM:
                # Replace one random element in beta
                index = rnd_data.randint(D)
                beta_sum -= beta_j[j, index]
                beta_j[j, index] = mu_b[index] + rnd_data.randn() * sigma_b[index]
                beta_sum += beta_j[j, index]

        phi_true = np.empty(self.dphi)
        phi_true[0] = np.log(sigma)
        phi_true[1] = mu_a
        phi_true[2] = np.log(sigma_a)
        phi_true[3 : 3 + D] = mu_b
        phi_true[3 + D :] = np.log(sigma_b)

        # Determine suitable sigma_x
        sigma_x_j = calc_input_param_lin_reg(beta_j, sigma, Sigma_x)

        # Simulate data
        # Different sigma_x for every group
        X = np.empty((N, D))
        if Sigma_x is None:
            for j in xrange(J):
                X[j_lim[j] : j_lim[j + 1], :] = rnd_data.randn(Nj[j], D) * sigma_x_j[j]
        else:
            cho_x = cholesky(Sigma_x)
            for j in xrange(J):
                X[j_lim[j] : j_lim[j + 1], :] = rnd_data.randn(Nj[j], D).dot(sigma_x_j[j] * cho_x)
        y_true = np.empty(N)
        for n in xrange(N):
            y_true[n] = alpha_j[j_ind[n]] + X[n].dot(beta_j[j_ind[n]])
        y = y_true + rnd_data.randn(N) * sigma

        return data(
            X,
            y,
            {"sigma_x": sigma_x_j, "Sigma_x": Sigma_x},
            y_true,
            Nj,
            j_lim,
            j_ind,
            {"phi": phi_true, "alpha": alpha_j, "beta": beta_j, "sigma": sigma},
        )
Example #5
0
 def simulate_data(self, Sigma_x=None, seed=None):
     """Simulate data from the model.
     
     Returns models.common.data instance
     
     Parameters
     ----------
     Sigma_x : {None, 'rand', ndarray}
         The covariance structure of the explanatory variable. This is 
         scaled to regulate the uncertainty. If not provided or None, 
         identity matrix is used. Providing string 'rand' uses method
         common.rand_corr_vine to randomise one.
     
     """
     # Localise params
     J = self.J
     D = self.D
     npg = self.npg
     
     # Set seed
     rnd_data = np.random.RandomState(seed=seed)
     # Draw random seed for input covariance for consistency in randomness
     # even if not needed
     seed_input_cov = rnd_data.randint(2**31-1)
     
     # Randomise input covariance structure if needed
     if Sigma_x == 'rand':
         Sigma_x = rand_corr_vine(D, seed=seed_input_cov)
     
     # Parameters
     # Number of observations for each group
     if hasattr(npg, '__getitem__') and len(npg) == 2:
         Nj = rnd_data.randint(npg[0],npg[1]+1, size=J)
     else:
         Nj = npg*np.ones(J, dtype=np.int64)
     # Total number of observations
     N = np.sum(Nj)
     # Observation index limits for J groups
     j_lim = np.concatenate(([0], np.cumsum(Nj)))
     # Group indices for each sample
     j_ind = np.empty(N, dtype=np.int64)
     for j in xrange(J):
         j_ind[j_lim[j]:j_lim[j+1]] = j
     
     # Assign parameters
     if SIGMA_A is None:
         sigma_a = np.exp(rnd_data.randn()*SIGMA_AH)
     else:
         sigma_a = SIGMA_A
     sigma_b = np.exp(rnd_data.randn(D)*SIGMA_BH)
     alpha_j = rnd_data.randn(J)*sigma_a
     beta_j = rnd_data.randn(J,D)*sigma_b
     
     # Regulate beta
     for j in xrange(J):
         beta_sum = np.sum(beta_j[j])
         while np.abs(beta_sum) < B_ABS_MIN_SUM:
             # Replace one random element in beta
             index = rnd_data.randint(D)
             beta_sum -= beta_j[j,index]
             beta_j[j,index] = rnd_data.randn()*sigma_b[index]
             beta_sum += beta_j[j,index]
     
     phi_true = np.append(np.log(sigma_a), np.log(sigma_b))
     
     # Determine suitable mu_x and sigma_x
     mu_x_j, sigma_x_j = calc_input_param_classification(
         alpha_j, beta_j, Sigma_x
     )
     
     # Simulate data
     # Different mu_x and sigma_x for every group
     X = np.empty((N,D))
     if Sigma_x is None:
         for j in xrange(J):
             X[j_lim[j]:j_lim[j+1],:] = \
                 mu_x_j[j] + rnd_data.randn(Nj[j],D)*sigma_x_j[j]
     else:
         cho_x = cholesky(Sigma_x)
         for j in xrange(J):
             X[j_lim[j]:j_lim[j+1],:] = \
                 mu_x_j[j] + rnd_data.randn(Nj[j],D).dot(sigma_x_j[j]*cho_x)
     y = np.empty(N)
     for n in xrange(N):
         y[n] = alpha_j[j_ind[n]] + X[n].dot(beta_j[j_ind[n]])
     y = 1/(1+np.exp(-y))
     y_true = (0.5 < y).astype(int)
     y = (rnd_data.rand(N) < y).astype(int)
     
     return data(
         X, y, {'mu_x':mu_x_j, 'sigma_x':sigma_x_j, 'Sigma_x':Sigma_x}, 
         y_true, Nj, j_lim, j_ind, {'phi':phi_true, 'alpha':alpha_j, 
         'beta':beta_j}
     )
Example #6
0
    def load_data(self, name, seed=None):
        """Simulate data from the model.
        
        Returns models.common.data instance
        
        """
        
        # load data
        data_full = np.loadtxt('data/%s.txt' % name)

        # We obtain the features and the targets

        X = data_full[ :, range(data_full.shape[ 1 ] - 1) ]
        y = data_full[ :, data_full.shape[ 1 ] - 1 ].astype(int)

        # We create the train and test sets with 90% and 10% of the data

        permutation = np.random.choice(range(X.shape[ 0 ]),
            X.shape[ 0 ], replace = False)
        size_train = np.round(X.shape[ 0 ] * 0.9)
        index_train = permutation[ 0 : size_train ]
        index_test = permutation[ size_train : ]

        X_train = X[ index_train, : ]
        y_train = y[ index_train ]
        X_test = X[ index_test, : ]
        y_test = y[ index_test ]
        
        # Localise params
        J = self.J
        self.D = X_train.shape[1]
        D = self.D
        self.npg = int(X_train.shape[0] / float(J))
        npg = self.npg
        
        X_train = X_train[:(self.npg*self.J), :]
        y_train = y_train[:(self.npg*self.J)]
        
        # Set seed
        rnd_data = np.random.RandomState(seed=seed)
        
        # Parameters
        # Number of observations for each group
        if hasattr(npg, '__getitem__') and len(npg) == 2:
            Nj = rnd_data.randint(npg[0],npg[1]+1, size=J)
        else:
            Nj = npg*np.ones(J, dtype=np.int64)
        # Total number of observations
        N = np.sum(Nj)
        # Observation index limits for J groups
        j_lim = np.concatenate(([0], np.cumsum(Nj)))
        # Group indices for each sample
        j_ind = np.empty(N, dtype=np.int64)
        for j in xrange(J):
            j_ind[j_lim[j]:j_lim[j+1]] = j
        
        if BETA is None:
            beta = rnd_data.randn(D)*SIGMA_B
        else:
            beta = BETA
        
        # Regulate beta
        beta_sum = np.sum(beta)
        while np.abs(beta_sum) < B_ABS_MIN_SUM:
            # Replace one random element in beta
            index = rnd_data.randint(D)
            beta_sum -= beta[index]
            beta[index] = rnd_data.randn()*SIGMA_B
            beta_sum += beta[index]
        
        phi_true = beta
        # Determine suitable mu_x and sigma_x
        mu_x_j, sigma_x_j = calc_input_param_classification(np.zeros(J), beta)
        
        return data(
            X_train, y_train, {'mu_x':mu_x_j, 'sigma_x':sigma_x_j}, y_train, Nj, j_lim, 
            j_ind, {'phi':phi_true, 'beta':beta}
        ), X_test, y_test