Example #1
0
def estimate_C2(Z, C1_inv=None):
    D, M = Z.shape
    if C1_inv is None:
        Weight_mat = np.eye(M) - (1 / M) * np.ones((M, M))
        return cov2corr(Z @ Weight_mat @ Z.T)
    return cov2corr(Z @ (C1_inv - (C1_inv @ np.ones(
        (M, M)) @ C1_inv) / np.matrix(C1_inv).sum()) @ Z.T)
    def compute_GRM(self):

        self.K = linear_kinship(self.G, verbose=True)
        self.K_corr = cov2corr(self.K)
        Zm = (self.G - 2 * self.f) / np.sqrt(2 * self.f * (1 - self.f))
        self.K_man = (1 / self.num_snps) * Zm @ Zm.T
        self.K_man_corr = cov2corr(self.K_man)
def test_cov2corr():
    cov_a = np.ones((3, 3)) + np.diag(np.arange(1, 4) ** 2 - 1)
    corr_a = np.array([[1, 1 / 2., 1 / 3.],
                       [1 / 2., 1, 1 / 2. / 3.],
                       [1 / 3., 1 / 2. / 3., 1]])

    corr = cov2corr(cov_a)
    assert_almost_equal(corr, corr_a, decimal=15)

    cov_mat = cov_a
    corr_mat = cov2corr(cov_mat)
    assert_(isinstance(corr_mat, np.ndarray))
    assert_equal(corr_mat, corr)

    cov_ma = np.ma.array(cov_a)
    corr_ma = cov2corr(cov_ma)
    assert_equal(corr_mat, corr)

    assert_(isinstance(corr_ma, np.ma.core.MaskedArray))

    cov_ma2 = np.ma.array(cov_a, mask=[[False, True, False],
                                       [True, False, False],
                                       [False, False, False]])

    corr_ma2 = cov2corr(cov_ma2)
    assert_(np.ma.allclose(corr_ma, corr, atol=1e-15))
    assert_equal(corr_ma2.mask, cov_ma2.mask)
def test_cov2corr():
    cov_a = np.ones((3, 3)) + np.diag(np.arange(1, 4)**2 - 1)
    corr_a = np.array([[1, 1 / 2., 1 / 3.], [1 / 2., 1, 1 / 2. / 3.],
                       [1 / 3., 1 / 2. / 3., 1]])

    corr = cov2corr(cov_a)
    assert_almost_equal(corr, corr_a, decimal=15)

    cov_mat = np.matrix(cov_a)
    corr_mat = cov2corr(cov_mat)
    assert_(isinstance(corr_mat, np.matrixlib.defmatrix.matrix))
    assert_equal(corr_mat, corr)

    cov_ma = np.ma.array(cov_a)
    corr_ma = cov2corr(cov_ma)
    assert_equal(corr_mat, corr)

    assert_(isinstance(corr_ma, np.ma.core.MaskedArray))

    cov_ma2 = np.ma.array(cov_a,
                          mask=[[False, True, False], [True, False, False],
                                [False, False, False]])

    corr_ma2 = cov2corr(cov_ma2)
    assert_(np.ma.allclose(corr_ma, corr, atol=1e-15))
    assert_equal(corr_ma2.mask, cov_ma2.mask)
 def step0_whole(self):
     self.C1_raw = None
     chromsome_unique, _ = np.unique(self.chromsome_list,
                                     return_inverse=True)
     centering = np.eye(self.num_sample) - (1 / self.num_sample) * np.ones(
         (self.num_sample, self.num_sample))
     now = datetime.now().time()
     self.C1_raw = self.G.T @ centering @ self.G
     self.C1 = cov2corr(self.C1_raw)
     self.C1_ = cov2corr(self.C1_raw +
                         np.eye(self.num_snps) * self.perturbation)
Example #6
0
    def shrinkage_est(self, res, target):
        '''
        Estimate the covariance matrix, using the shrinkage estimator

        Parameters
        ----------
        res : pandas.DataFrame or numpy.array
            DESCRIPTION.
        target : pandas.DataFrame or numpy.array
            DESCRIPTION.

        Returns
        -------
        shrink_cov : numpy.array
            Shrinkage covariance estimator.
        lamb : double
            Shrinkage parameter.
        '''

        #Make sure res and target are np arrays
        res = np.array(res)
        target = np.array(target)

        #Get the parameters of the residuals matrix
        n = res.shape[0]

        #Get the sample correlation and covariance matrix of the residuals
        covm = res.T.dot(res) / n
        corm = moment_helpers.cov2corr(covm)

        #Give the residuals equal standard deviation (=1)
        res_scaled = res / np.diag(covm)**0.5

        #Define the shrinking intensity lambda
        v = (1 / (n *
                  (n - 1))) * (((res_scaled**2).T.dot(res_scaled**2)) -
                               (1 / n) * (((res_scaled).T.dot(res_scaled))**2))
        np.fill_diagonal(v, 0)

        corapn = moment_helpers.cov2corr(target)
        d = (corm - corapn)**2

        lamb = sum(sum(v)) / sum(sum(d))
        lamb = max(min(lamb, 1), 0)

        #Define the shrinkage estimator
        shrink_cov = lamb * target + (1 - lamb) * covm

        return shrink_cov, lamb
Example #7
0
    def create_dict(self):
        """Return a PMBEC correlation 2D dictionary."""
        # Silence stdout, since read_coefficients prints to stdout
        # TODO: Just fix pepdata.pmbec to not do this.
        with no_stdout():
            pmbec_coeffs = pmbec.read_coefficients()
            pmbec_coeffs_df = pd.DataFrame(pmbec_coeffs)

        # Use correlation rather than covariance
        pmbec_df = pd.DataFrame(cov2corr(pmbec_coeffs_df))
        pmbec_df.index = pmbec_coeffs_df.index
        pmbec_df.columns = pmbec_coeffs_df.columns

        # Include invalid letters, as Smith-Waterman expects substitution matrix values for them
        pmbec_dict = defaultdict(dict)
        pmbec_dict.update(pmbec_df.to_dict())
        valid_letters = set(pmbec_dict.keys())
        all_letters = valid_letters.union(INVALID_AMINO_ACID_LETTERS)
        for letter_i in all_letters:
            for letter_j in all_letters:
                if not(letter_i in valid_letters and letter_j in valid_letters):
                    # We dont need lower than 0, as Smith-Waterman sets negative scores to 0
                    pmbec_dict[letter_i][letter_j] = 0

        return pmbec_dict
Example #8
0
    def test_eigenvalue_calculation(self):

        # Test to make sure non-group and group versions agree
        # (in the case of no grouping)
        p = 100
        groups = np.arange(0, p, 1) + 1
        for rho in [0, 0.3, 0.5, 0.7]:
            V = np.zeros((p, p)) + rho
            for i in range(p):
                V[i, i] = 1
            expected_gamma = min(1, 2 * (1 - rho))
            gamma = knockoffs.calc_min_group_eigenvalue(
                Sigma=V,
                groups=groups,
            )
            np.testing.assert_almost_equal(
                gamma,
                expected_gamma,
                decimal=3,
                err_msg='calc_min_group_eigenvalue calculates wrong eigenvalue'
            )

        # Test non equicorrelated version
        V = np.random.randn(p, p)
        V = np.dot(V.T, V) + 0.1 * np.eye(p)
        V = cov2corr(V)
        expected_gamma = min(1, 2 * np.linalg.eigh(V)[0].min())
        gamma = knockoffs.calc_min_group_eigenvalue(Sigma=V, groups=groups)
        np.testing.assert_almost_equal(
            gamma,
            expected_gamma,
            decimal=3,
            err_msg='calc_min_group_eigenvalue calculates wrong eigenvalue')
Example #9
0
    def get_covar(self):
        '''
        Computes the sample variance-covariance and correlation matrices
        for the returns of the portfolio assets. 
        
        returns: 
            self.covar, a dataframe of pairwise covariance coefficients
                between each of the portfolio assets.
            self.corrs, a dataframe of pairwise correlation coefficients
                between each of the portfolio assets.
        '''

        # compute covariances
        cov = np.cov(self.returns, rowvar=False)

        # assign to self as neat dataframe
        self.covar = pd.DataFrame(cov, columns=self.assets, index=self.assets)

        # compute correlation matrix from covar matrix
        corrs = cov2corr(self.covar)
        self.corrs = pd.DataFrame(corrs,
                                  columns=self.assets,
                                  index=self.assets)

        return self
Example #10
0
    def create_dict(self):
        """Return a PMBEC correlation 2D dictionary."""
        # Silence stdout, since read_coefficients prints to stdout
        # TODO: Just fix pepdata.pmbec to not do this.
        with no_stdout():
            pmbec_coeffs = pmbec.read_coefficients()
            pmbec_coeffs_df = pd.DataFrame(pmbec_coeffs)

        # Use correlation rather than covariance
        pmbec_df = pd.DataFrame(cov2corr(pmbec_coeffs_df))
        pmbec_df.index = pmbec_coeffs_df.index
        pmbec_df.columns = pmbec_coeffs_df.columns

        # Include invalid letters, as Smith-Waterman expects substitution matrix values for them
        pmbec_dict = defaultdict(dict)
        pmbec_dict.update(pmbec_df.to_dict())
        valid_letters = set(pmbec_dict.keys())
        all_letters = valid_letters.union(INVALID_AMINO_ACID_LETTERS)
        for letter_i in all_letters:
            for letter_j in all_letters:
                if not (letter_i in valid_letters
                        and letter_j in valid_letters):
                    # We dont need lower than 0, as Smith-Waterman sets negative scores to 0
                    pmbec_dict[letter_i][letter_j] = 0

        return pmbec_dict
Example #11
0
 def mle_estimate_error(self):
     '''MLE-estimation from the POPRES analysis. Bins the data. And can deal with errors.
     Param[0] always C; Param[1] always sigma'''
     # First create mle_object
     pw_dist, pw_IBD, pair_nr = self.give_pairwise_statistics()  # Create full pw. statistics
     pw_dist, pw_IBD, pair_nr = self.bin_pairwise_statistics(pw_dist, pw_IBD, pair_nr)
     bl_shr_density = uniform_density
     start_params = [1.0, 2.0]  # 1: D 2: Sigma
     
     # Create MLE_estimation object:
     ml_estimator = MLE_estim_error(bl_shr_density, start_params, pw_dist, pw_IBD, pair_nr, error_model=False) 
     self.estimates = start_params  # Best guess without doing anything. Used as start for Bootstrap
     
     print("Doing fit...")
     results = ml_estimator.fit()  # method="nelder-mead"
     # results0 = ml_estimator.fit(method="BFGS")  # Do the actual fit. method="BFGS" possible
     self.estimates = results.params  # Save the paramter estimates
         
     fisher_info = np.matrix(ml_estimator.hessian(results.params))  # Get the Fisher Info matrix
     corr_mat = cov2corr(-fisher_info.I)
     print(corr_mat)
     stds = np.sqrt(np.diag(-fisher_info.I))
     self.stds = stds  # Save estimated STDS
         
     for i in range(len(results.params)):
         print("Parameter %i: %.6f" % (i, results.params[i]))
         print("CI: " + str(results.conf_int()[i]))
         # print("Estimated STD: %.6f" % stds[i])
     # print("D=%.5f" % self.from_C_to_D_e(results.params[0], results.params[1]))    
     print(results.summary())  # Give out the results.
Example #12
0
def estimate_C1(G, chromsome_list, Vmouse=None, Lambda=0.005, UseR=False):
    N, M = G.shape
    C1, C1_inv = None, None

    chromsome_unique, _ = np.unique(chromsome_list, return_inverse=True)
    counter = 1

    if Vmouse is None:
        Weight_mat = np.eye(N) - (1 / N) * np.ones((N, N))
    else:
        Weight_mat = find_inv_via_R(Vmouse) if UseR else np.linalg.inv(Vmouse)

    for chrom in chromsome_unique:
        subset = np.where(chromsome_list == chrom)[0]
        G_m = G[:, subset]
        V1_block = (G_m - 1).T @ Weight_mat @ (G_m - 1)
        C1_block = cov2corr(V1_block)

        C1_block = (1 - Lambda) * C1_block + Lambda * np.eye(C1_block.shape[0])
        C1_inv_block = find_inv_via_R(C1_block) if UseR else np.linalg.inv(
            C1_block)

        if counter == 1:
            C1 = C1_block
            C1_inv = C1_inv_block
            counter += 1
        else:
            C1 = block_diag(C1, C1_block)
            C1_inv = block_diag(C1_inv, C1_inv_block)
    return C1, C1_inv
 def check_colCorr(self):
     index = np.arange(0, self.num_trait, 25)
     self.colCorr = cov2corr(self.colCov)
     self.colCorr_subset = self.colCorr[index, :][:, index]
     fig = plt.figure()
     ax = fig.add_subplot(1, 1, 1)
     ax = sns.heatmap(self.colCorr_subset, center=0)
     plt.savefig("colCorr_heatmap.pdf")
Example #14
0
def generate(
    mu: pd.Series, Q: pd.DataFrame, nPaths: int = 100, repeat: int = 250, T: int = 6,
):
    total = np.zeros(len(mu))
    num_asset = len(mu)
    rho = cov2corr(Q)
    L = cholesky(rho, lower=True)
    dt = T
    confidence_level = 0.95
    variances = np.diag(Q)
    f = np.zeros(num_asset + nPaths + 1)
    f[:nPaths] = 1 / ((1 - confidence_level) * nPaths)
    f[nPaths : num_asset + nPaths] = 0
    f[-1] = 1
    A = np.array(
        [[0.0 for k in range(nPaths + num_asset + 1)] for j in range(2 * nPaths)]
    )

    A[:nPaths, :nPaths] = -1 * np.eye(nPaths)
    A[nPaths : (2 * nPaths), :nPaths] = -1 * np.eye(nPaths)
    A[nPaths : (2 * nPaths), -1] = -1

    Aeq = np.array([[0.0 for k in range(nPaths + num_asset + 1)] for j in range(1)])
    Aeq[0, nPaths : (nPaths + num_asset)] = 1

    beq = np.array([1])

    b = np.array([0.0 for k in range(3 * nPaths + num_asset + 1)])
    b[(2 * nPaths) : (3 * nPaths)] = 1000000000000
    b[(3 * nPaths) : num_asset + 3 * nPaths] = 0
    b[-1] = 1000000000000

    temp = -1 * np.eye(nPaths + num_asset + 1)

    exp_term_1 = ((mu.to_numpy() - 0.5 * variances) * dt).reshape(-1, 1)
    exp_term_2 = np.sqrt(variances * dt).reshape(-1, 1)

    for i in range(repeat):
        S = np.zeros((num_asset, 2, nPaths))
        S[:, 0, :] = 100

        xi = np.dot(L, np.random.randn(num_asset, nPaths))
        S[:, 1, :] = S[:, 0, :] * np.exp(exp_term_1 + exp_term_2 * xi)

        # returns_sample n_asset * nPeriod * nPaths
        returns_sample = S[:, -1, :] / S[:, 0, :] - 1

        for i in range((nPaths), (2 * nPaths)):
            A[i, nPaths : (nPaths + num_asset)] = -returns_sample[:, i - nPaths]

        A_ub = np.concatenate((A, temp), axis=0)

        res = linprog(
            c=f, A_ub=A_ub, b_ub=b, A_eq=Aeq, b_eq=beq, method="interior-point"
        )
        total = np.add(total, res.x[nPaths : nPaths + num_asset])

    return pd.Series(total / repeat, index=mu.index)
Example #15
0
def plot_black_litterman_results(ret_bl, covar_bl, market_prior, mu):
    rets_df = pd.DataFrame(
        [market_prior, ret_bl, pd.Series(mu)],
        index=["Prior", "Posterior", "Views"]).T
    rets_df.plot.bar(figsize=(12, 8), title='Black-Litterman Expected Returns')
    plot_heatmap(covar_bl, 'Black-Litterman Covariance', '', '')
    corr_bl = mh.cov2corr(covar_bl)
    corr_bl = pd.DataFrame(corr_bl,
                           index=covar_bl.index,
                           columns=covar_bl.columns)
    plot_heatmap(corr_bl, 'Black-Litterman Correlation', '', '')
Example #16
0
def generate_pheno(kinship, hsquared, N=300, P=15, rho=0.45):
    '''
	Generates phenotype data from MN distribution
	N = n_samples, P = n_traits, and rho is the autocorrelation parameter to B
	kinship matrix must be NxN
	RETURNS ndarray of size (N x P)
	'''

    assert kinship.shape == (N, N)

    B = generate_B(P, rho)
    E = generate_E(P)

    chumma = np.linalg.cholesky(kinship)

    U = matrix_normal.rvs(rowcov=kinship, colcov=hsquared * cov2corr(B))
    epsilon = matrix_normal.rvs(rowcov=np.eye(N),
                                colcov=(1 - hsquared) * cov2corr(E))

    return U + epsilon
Example #17
0
    def test_psd(self):

        # Test S matrix construction
        p = 100
        V = np.random.randn(p, p)
        V = np.dot(V.T, V) + 0.1 * np.eye(p)
        V = cov2corr(V)

        # Create random groups
        groups = np.random.randint(1, p, size=(p))
        groups = utilities.preprocess_groups(groups)
        S = knockoffs.equicorrelated_block_matrix(Sigma=V, groups=groups)

        # Check S properties
        self.check_S_properties(V, S, groups)
Example #18
0
def test_cov_diagonal(cov, nobs):
    r"""One sample hypothesis test that covariance matrix is diagonal matrix.

    The Null and alternative hypotheses are

    .. math::

       H0 &: \Sigma = diag(\sigma_i) \\
       H1 &: \Sigma \neq diag(\sigma_i)

    where :math:`\sigma_i` are the variances with unspecified values.

    Parameters
    ----------
    cov : array_like
        Covariance matrix of the data, estimated with denominator ``(N - 1)``,
        i.e. `ddof=1`.
    nobs : int
        number of observations used in the estimation of the covariance

    Returns
    -------
    res : instance of HolderTuple
        results with ``statistic, pvalue`` and other attributes like ``df``

    References
    ----------
    Rencher, Alvin C., and William F. Christensen. 2012. Methods of
    Multivariate Analysis: Rencher/Methods. Wiley Series in Probability and
    Statistics. Hoboken, NJ, USA: John Wiley & Sons, Inc.
    https://doi.org/10.1002/9781118391686.

    StataCorp, L. P. Stata Multivariate Statistics: Reference Manual.
    Stata Press Publication.
    """
    cov = np.asarray(cov)
    k = cov.shape[0]
    R = cov2corr(cov)

    statistic = -(nobs - 1 - (2 * k + 5) / 6) * _logdet(R)
    df = k * (k - 1) / 2
    pvalue = stats.chi2.sf(statistic, df)
    return HolderTuple(statistic=statistic,
                       pvalue=pvalue,
                       df=df,
                       distr="chi2",
                       null="diagonal"
                       )
    def step1(self):
        now = datetime.now().time()
        print('******************************* Start to obtain C2:',
              " now the time is ", now)

        #u, s, _ = np.linalg.svd(self.V1)
        #L = u @ np.sqrt(np.diag(s)) @ u.T
        L = np.linalg.cholesky(self.V1)
        Z_update = self.Z @ np.linalg.inv(L).T
        centering = np.eye(self.num_snps) - (1 / self.num_snps) * np.ones(
            (self.num_snps, self.num_snps))
        self.C2_raw = Z_update @ centering @ Z_update.T
        self.C2 = cov2corr(self.C2_raw)
        now = datetime.now().time()
        print('******************************* Finished obtainning C2.',
              "now the time is ", now)
Example #20
0
def hub_spoke_corr_mat(D=50, groups=5, v=0.3, u=0.1):

    G = D // groups  # group size
    Theta = np.zeros([D, D])

    for g in range(groups):
        for i in range(G):
            Theta[g * G, g * G + i] = Theta[g * G + i, g * G] = 1

    Theta[np.diag_indices(D)] = 0
    Omega = Theta * v
    Omega[np.diag_indices(D)] = np.abs(np.min(
        np.linalg.eigvals(Omega))) + 0.1 + u
    Sigma = cov2corr(np.linalg.inv(Omega))
    Omega = np.linalg.inv(Sigma)

    return Omega, Sigma
def hub_spoke_corr_mat(D=50, groups=5, v=0.3, u=0.1):
    """Port of data generation code from Wasserman's Huge package."""
    from statsmodels.stats.moment_helpers import cov2corr

    G = D // groups  # group size
    Theta = np.zeros([D, D])

    for g in range(groups):
        for i in range(G):
            Theta[g * G, g * G + i] = Theta[g * G + i, g * G] = 1

    Theta[np.diag_indices(D)] = 0
    Omega = Theta * v
    Omega[np.diag_indices(D)] = np.abs(np.min(
        np.linalg.eigvals(Omega))) + 0.1 + u
    Sigma = cov2corr(np.linalg.inv(Omega))
    Omega = np.linalg.inv(Sigma)

    return Omega, Sigma
Example #22
0
    def mle_analysis_error(self):
        '''Does a maximum likelihood analysis with the full error model. Parameters can be found there
        Param[0] always C; Param[1] always sigma'''
        ml_estimator = self.mle_object 
        print("Doing fit...")
        results = ml_estimator.fit()  # method="nelder-mead"
        # results0 = ml_estimator.fit(method="BFGS")  # Do the actual fit. method="BFGS" possible
        self.estimates = results.params  # Save the paramter estimates (0: c 1:sigma ...)
        self.ci_s = results.conf_int()

            
        fisher_info = np.matrix(ml_estimator.hessian(results.params))  # Get the Fisher Info matrix
        corr_mat = cov2corr(-fisher_info.I)
        print(corr_mat)
        stds = np.sqrt(np.diag(-fisher_info.I))
        self.stds = stds  # Save estimated STDS
            
        for i in range(len(results.params)):
            print("Parameter %i: %.6f" % (i, results.params[i]))
            # print("Estimated STD: %.6f" % stds[i]) 
        print(results.summary())  # Give out the results.
        self.mle_object = ml_estimator  # Remember the mle-estimation object.
Example #23
0
    def test_equicorrelated_construction(self):

        # Test S matrix construction
        p = 100
        groups = np.arange(0, p, 1) + 1
        V = np.random.randn(p, p)
        V = np.dot(V.T, V) + 0.1 * np.eye(p)
        V = cov2corr(V)

        # Expected construction
        expected_gamma = min(1, 2 * np.linalg.eigh(V)[0].min())
        expected_S = expected_gamma * np.eye(p)

        # Equicorrelated
        S = knockoffs.equicorrelated_block_matrix(Sigma=V, groups=groups)

        # Test to make sure the answer is expected
        np.testing.assert_almost_equal(
            S,
            expected_S,
            decimal=3,
            err_msg='calc_min_group_eigenvalue calculates wrong eigenvalue')
Example #24
0
def generate_rho_u(corr_type, D, num_spike=10, scale=0.1, num_block=10):

    if corr_type == "identity":
        return np.eye(D)

    elif corr_type == "spiked":
        spiked = 0
        for i in range(num_spike):
            v = np.random.rand(D, 1)
            v = v / np.linalg.norm(v, 2)
            spiked += (2**(-i + 1)) * (v @ v.T)
        return cov2corr(np.eye(D) + scale * spiked)

    elif corr_type == "geometric":
        rho_u = None
        block_size = int(D / num_block)
        rho_u_block = toeplitz([(scale**i) for i in range(block_size)])
        for i in range(num_block):
            if i == 0:
                rho_u = rho_u_block
            else:
                rho_u = block_diag(rho_u, rho_u_block)
        return rho_u
Example #25
0
    def test_sdp_tolerance(self):

        # Get graph
        np.random.seed(110)
        Q = graphs.ErdosRenyi(p=50, tol=1e-1)
        V = cov2corr(utilities.chol2inv(Q))
        groups = np.concatenate([np.zeros(10) + j for j in range(5)]) + 1
        groups = groups.astype('int32')

        # Solve SDP
        for tol in [1e-3, 0.01, 0.02]:
            S = knockoffs.compute_S_matrix(Sigma=V,
                                           groups=groups,
                                           method='sdp',
                                           objective="pnorm",
                                           num_iter=10,
                                           tol=tol)
            G = np.hstack([np.vstack([V, V - S]), np.vstack([V - S, V])])
            mineig = np.linalg.eig(G)[0].min()
            self.assertTrue(
                tol - mineig > -1 * tol / 10,
                f'sdp solver fails to control minimum eigenvalues: tol is {tol}, val is {mineig}'
            )
            self.check_S_properties(V, S, groups)
Example #26
0
    mu = mu.T
    
    #n_stock x n_stock asset covariance matrix
    Q = np.dot(np.dot(V.T,F),V) + D
    
    return [mu, Q]

[mu, Q] = fama_french(returns, factors)


## ********************************************
## stochastic MVO
## ********************************************
    
num_asset = len(mu)
rho = cov2corr(Q)

nPaths = 400
L = cholesky(rho, lower=True)
T = 12
N = 3
dt = T/N

# Because it is in a minimization problem, so minus means reward
reward_per_dollor_surplus = -2
# Because it is in a minimization problem, so positive means punishment
punishment_per_dollor_shortfall = 1

#risk adversion coefficient
risk_weight_coefficient = 1000
Example #27
0
def covariance2errors(covariances):
    corr, std = zip(*[cov2corr(c, True) for c in covariances])
    return np.asarray(corr), np.asarray(std)
Example #28
0
def test_short_panel():
    #this checks that some basic statistical properties are satisfied by the
    ##1lab_results, not verified #1lab_results against other packages
    #Note: the ranking of robust bse is different if within=True
    #I added within keyword to PanelSample to be able to use old example
    #if within is False, then there is no within group variation in exog.
    nobs = 100
    nobs_i = 5
    n_groups = nobs // nobs_i
    k_vars = 3

    dgp = PanelSample(nobs, k_vars, n_groups, corr_structure=cs.corr_arma,
                      corr_args=([1], [1., -0.9],), seed=377769, within=False)
    #print 'seed', dgp.seed
    y = dgp.generate_panel()
    noise = y - dgp.y_true

    #test dgp

    dgp_cov_e = np.array(
              [[ 1.    ,  0.9   ,  0.81  ,  0.729 ,  0.6561],
               [ 0.9   ,  1.    ,  0.9   ,  0.81  ,  0.729 ],
               [ 0.81  ,  0.9   ,  1.    ,  0.9   ,  0.81  ],
               [ 0.729 ,  0.81  ,  0.9   ,  1.    ,  0.9   ],
               [ 0.6561,  0.729 ,  0.81  ,  0.9   ,  1.    ]])

    npt.assert_almost_equal(dgp.cov, dgp_cov_e, 13)

    cov_noise = np.cov(noise.reshape(-1,n_groups, order='F'))
    corr_noise = cov2corr(cov_noise)
    npt.assert_almost_equal(corr_noise, dgp.cov, 1)

    #estimate panel model
    mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res2 = mod2.fit_iterative(2)


    #whitened residual should be uncorrelated
    corr_wresid = np.corrcoef(res2.wresid.reshape(-1,n_groups, order='F'))
    assert_maxabs(corr_wresid, np.eye(5), 0.1)

    #residual should have same correlation as dgp
    corr_resid = np.corrcoef(res2.resid.reshape(-1,n_groups, order='F'))
    assert_maxabs(corr_resid, dgp.cov, 0.1)

    assert_almost_equal(res2.resid.std(),1, decimal=0)

    y_pred = np.dot(mod2.exog, res2.params)
    assert_almost_equal(res2.fittedvalues, y_pred, 13)


    #compare with OLS

    res2_ols = mod2._fit_ols()
    npt.assert_(mod2.res_pooled is res2_ols)

    res2_ols = mod2.res_pooled  #TODO: BUG: requires call to _fit_ols

    #fitting once is the same as OLS
    #note: I need to create new instance, otherwise it continuous fitting
    mod1 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res1 = mod1.fit_iterative(1)

    assert_almost_equal(res1.params, res2_ols.params, decimal=13)
    assert_almost_equal(res1.bse, res2_ols.bse, decimal=13)

    res_ols = OLS(y, dgp.exog).fit()
    assert_almost_equal(res1.params, res_ols.params, decimal=13)
    assert_almost_equal(res1.bse, res_ols.bse, decimal=13)


    #compare with old version
    mod_old = ShortPanelGLS2(y, dgp.exog, dgp.groups)
    res_old = mod_old.fit()

    assert_almost_equal(res2.params, res_old.params, decimal=13)
    assert_almost_equal(res2.bse, res_old.bse, decimal=13)


    mod5 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res5 = mod5.fit_iterative(5)

    #make sure it's different
    #npt.assert_array_less(0.009, em.maxabs(res5.bse, res2.bse))

    cov_clu = sw.cov_cluster(mod2.res_pooled, dgp.groups.astype(int))
    clubse = se_cov(cov_clu)
    pnwbse = se_cov(sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx))
    bser = np.vstack((res2.bse, res5.bse, clubse, pnwbse))
    bser_mean = np.mean(bser, axis=0)

    #cov_cluster close to robust and PanelGLS
    #is up to 24% larger than mean of bser
    #npt.assert_array_less(0, clubse / bser_mean - 1)
    npt.assert_array_less(clubse / bser_mean - 1, 0.25)
    #cov_nw_panel close to robust and PanelGLS
    npt.assert_array_less(pnwbse / bser_mean - 1, 0.1)
    #OLS underestimates bse, robust at least 60% larger
    npt.assert_array_less(0.6, bser_mean / res_ols.bse  - 1)

    #cov_hac_panel with uniform_kernel is the same as cov_cluster for balanced
    #panel with full length kernel
    #I fixe default correction to be equal
    cov_uni = sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx,
                              weights_func=sw.weights_uniform,
                              use_correction='c')
    assert_almost_equal(cov_uni, cov_clu, decimal=13)

    #without correction
    cov_clu2 = sw.cov_cluster(mod2.res_pooled, dgp.groups.astype(int),
                              use_correction=False)
    cov_uni2 = sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx,
                              weights_func=sw.weights_uniform,
                              use_correction=False)
    assert_almost_equal(cov_uni2, cov_clu2, decimal=13)

    cov_white = sw.cov_white_simple(mod2.res_pooled)
    cov_pnw0 = sw.cov_nw_panel(mod2.res_pooled, 0, mod2.group.groupidx,
                              use_correction='hac')
    assert_almost_equal(cov_pnw0, cov_white, decimal=13)
import numpy as np
from statsmodels.stats import moment_helpers as mh
from math import sqrt
from numpy import linalg as LA

sigma = np.array([[4, 2, -3], [2, 9, 0], [-3, 0, 9]])
x1 = np.array([[1], [0], [0]])
x2 = np.array([[0], [-1], [0]])
x3 = np.array([[1], [1], [0]])
x3_hat = x3 / (x3**2).sum()**0.5

print('Original Matrix')
print(sigma)

x1_var = np.transpose(x1) * sigma * x1
x2_var = np.transpose(x2) * sigma * x2
x3_var = np.transpose(x3_hat) * sigma * x3_hat

print('Variance (x1) = \n', x1_var)
print('Variance (x2) = \n', x2_var)
print('Variance (x3) =', x3_var.sum())

print()
print('Cov 2 Corr')
print(mh.cov2corr(sigma))
 def cov(self, k_vars=None):
     return cov2corr(self.corr(k_vars=None), self.sigma)
    kernel_mtx[t_ix, np.arange(t_ix + 1, len_t)] = kernel_vec[np.arange(
        1, len_t - t_ix)]
kernel_mtx = kernel_mtx + np.transpose(kernel_mtx) + np.eye(len_t)

var_E = []
for i in range(n):
    var_i = []
    for t in range(len_t):
        var_s = []
        for s in range(len_t):
            var_s.append(kernel_mtx[t, s] *
                         np.outer(Y[:, i, :][s, :], Y[:, i, :][s, :]))
        var_i.append(np.sum(np.array(var_s), 0) / np.sum(kernel_mtx[t, :]))
    var_E.append(var_i)

alpha_1 = alpha_max(cov2corr(C0_hat))
alpha_0 = alpha_1 * 0.1
alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0), 50)
A0_hat_list = [cov.graph_lasso(cov2corr(C0_hat), alpha)[1] for alpha in alphas]

alpha_1 = alpha_max(np.array(cov2corr(C0)))
alpha_0 = alpha_1 * 0.1
alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0), 50)
A0_oracle_list = [
    cov.graph_lasso(np.array(cov2corr(C0)), alpha)[1] for alpha in alphas
]

alpha_1 = alpha_max(cov2corr(C0_X_hat))
alpha_0 = alpha_1 * 0.1
alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0), 50)
A0_X_list = [cov.graph_lasso(cov2corr(C0_X_hat), alpha)[1] for alpha in alphas]
 def cov(self, k_vars=None):
     return cov2corr(self.corr(k_vars=None), self.sigma)
def cov_nearest(cov, method='clipped', threshold=1e-15, n_fact=100,
                return_all=False):

    '''
    Find the nearest covariance matrix that is postive (semi-) definite

    This leaves the diagonal, i.e. the variance, unchanged

    Parameters
    ----------
    cov : ndarray, (k,k)
        initial covariance matrix
    method : string
        if "clipped", then the faster but less accurate ``corr_clipped`` is used.
        if "nearest", then ``corr_nearest`` is used
    threshold : float
        clipping threshold for smallest eigen value, see Notes
    nfact : int or float
        factor to determine the maximum number of iterations in
        ``corr_nearest``. See its doc string
    return_all : bool
        if False (default), then only the covariance matrix is returned.
        If True, then correlation matrix and standard deviation are
        additionally returned.

    Returns
    -------
    cov_ : ndarray
        corrected covariance matrix
    corr_ : ndarray, (optional)
        corrected correlation matrix
    std_ : ndarray, (optional)
        standard deviation


    Notes
    -----
    This converts the covariance matrix to a correlation matrix. Then, finds
    the nearest correlation matrix that is positive semidefinite and converts
    it back to a covariance matrix using the initial standard deviation.

    The smallest eigenvalue of the intermediate correlation matrix is
    approximately equal to the ``threshold``.
    If the threshold=0, then the smallest eigenvalue of the correlation matrix
    might be negative, but zero within a numerical error, for example in the
    range of -1e-16.

    Assumes input covariance matrix is symmetric.

    See Also
    --------
    corr_nearest
    corr_clipped

    '''

    from statsmodels.stats.moment_helpers import cov2corr, corr2cov
    cov_, std_ = cov2corr(cov, return_std=True)
    if method == 'clipped':
        corr_ = corr_clipped(cov_, threshold=threshold)
    elif method == 'nearest':
        corr_ = corr_nearest(cov_, threshold=threshold, n_fact=n_fact)

    cov_ = corr2cov(corr_, std_)

    if return_all:
        return cov_, corr_, std_
    else:
        return cov_
def cov_nearest(cov,
                method='clipped',
                threshold=1e-15,
                n_fact=100,
                return_all=False):
    """
    Find the nearest covariance matrix that is postive (semi-) definite

    This leaves the diagonal, i.e. the variance, unchanged

    Parameters
    ----------
    cov : ndarray, (k,k)
        initial covariance matrix
    method : string
        if "clipped", then the faster but less accurate ``corr_clipped`` is
        used.if "nearest", then ``corr_nearest`` is used
    threshold : float
        clipping threshold for smallest eigen value, see Notes
    n_fact : int or float
        factor to determine the maximum number of iterations in
        ``corr_nearest``. See its doc string
    return_all : bool
        if False (default), then only the covariance matrix is returned.
        If True, then correlation matrix and standard deviation are
        additionally returned.

    Returns
    -------
    cov_ : ndarray
        corrected covariance matrix
    corr_ : ndarray, (optional)
        corrected correlation matrix
    std_ : ndarray, (optional)
        standard deviation


    Notes
    -----
    This converts the covariance matrix to a correlation matrix. Then, finds
    the nearest correlation matrix that is positive semidefinite and converts
    it back to a covariance matrix using the initial standard deviation.

    The smallest eigenvalue of the intermediate correlation matrix is
    approximately equal to the ``threshold``.
    If the threshold=0, then the smallest eigenvalue of the correlation matrix
    might be negative, but zero within a numerical error, for example in the
    range of -1e-16.

    Assumes input covariance matrix is symmetric.

    See Also
    --------
    corr_nearest
    corr_clipped
    """

    from statsmodels.stats.moment_helpers import cov2corr, corr2cov
    cov_, std_ = cov2corr(cov, return_std=True)
    if method == 'clipped':
        corr_ = corr_clipped(cov_, threshold=threshold)
    else:  # method == 'nearest'
        corr_ = corr_nearest(cov_, threshold=threshold, n_fact=n_fact)

    cov_ = corr2cov(corr_, std_)

    if return_all:
        return cov_, corr_, std_
    else:
        return cov_
def test_short_panel():
    #this checks that some basic statistical properties are satisfied by the
    #results, not verified results against other packages
    #Note: the ranking of robust bse is different if within=True
    #I added within keyword to PanelSample to be able to use old example
    #if within is False, then there is no within group variation in exog.
    nobs = 100
    nobs_i = 5
    n_groups = nobs // nobs_i
    k_vars = 3

    dgp = PanelSample(nobs, k_vars, n_groups, corr_structure=cs.corr_arma,
                      corr_args=([1], [1., -0.9],), seed=377769, within=False)
    #print 'seed', dgp.seed
    y = dgp.generate_panel()
    noise = y - dgp.y_true

    #test dgp

    dgp_cov_e = np.array(
              [[ 1.    ,  0.9   ,  0.81  ,  0.729 ,  0.6561],
               [ 0.9   ,  1.    ,  0.9   ,  0.81  ,  0.729 ],
               [ 0.81  ,  0.9   ,  1.    ,  0.9   ,  0.81  ],
               [ 0.729 ,  0.81  ,  0.9   ,  1.    ,  0.9   ],
               [ 0.6561,  0.729 ,  0.81  ,  0.9   ,  1.    ]])

    npt.assert_almost_equal(dgp.cov, dgp_cov_e, 13)

    cov_noise = np.cov(noise.reshape(-1,n_groups, order='F'))
    corr_noise = cov2corr(cov_noise)
    npt.assert_almost_equal(corr_noise, dgp.cov, 1)

    #estimate panel model
    mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res2 = mod2.fit_iterative(2)


    #whitened residual should be uncorrelated
    corr_wresid = np.corrcoef(res2.wresid.reshape(-1,n_groups, order='F'))
    assert_maxabs(corr_wresid, np.eye(5), 0.1)

    #residual should have same correlation as dgp
    corr_resid = np.corrcoef(res2.resid.reshape(-1,n_groups, order='F'))
    assert_maxabs(corr_resid, dgp.cov, 0.1)

    assert_almost_equal(res2.resid.std(),1, decimal=0)

    y_pred = np.dot(mod2.exog, res2.params)
    assert_almost_equal(res2.fittedvalues, y_pred, 13)


    #compare with OLS

    res2_ols = mod2._fit_ols()
    npt.assert_(mod2.res_pooled is res2_ols)

    res2_ols = mod2.res_pooled  #TODO: BUG: requires call to _fit_ols

    #fitting once is the same as OLS
    #note: I need to create new instance, otherwise it continuous fitting
    mod1 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res1 = mod1.fit_iterative(1)

    assert_almost_equal(res1.params, res2_ols.params, decimal=13)
    assert_almost_equal(res1.bse, res2_ols.bse, decimal=13)

    res_ols = OLS(y, dgp.exog).fit()
    assert_almost_equal(res1.params, res_ols.params, decimal=13)
    assert_almost_equal(res1.bse, res_ols.bse, decimal=13)


    #compare with old version
    mod_old = ShortPanelGLS2(y, dgp.exog, dgp.groups)
    res_old = mod_old.fit()

    assert_almost_equal(res2.params, res_old.params, decimal=13)
    assert_almost_equal(res2.bse, res_old.bse, decimal=13)


    mod5 = ShortPanelGLS(y, dgp.exog, dgp.groups)
    res5 = mod5.fit_iterative(5)

    #make sure it's different
    #npt.assert_array_less(0.009, em.maxabs(res5.bse, res2.bse))

    cov_clu = sw.cov_cluster(mod2.res_pooled, dgp.groups.astype(int))
    clubse = se_cov(cov_clu)
    pnwbse = se_cov(sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx))
    bser = np.vstack((res2.bse, res5.bse, clubse, pnwbse))
    bser_mean = np.mean(bser, axis=0)

    #cov_cluster close to robust and PanelGLS
    #is up to 24% larger than mean of bser
    #npt.assert_array_less(0, clubse / bser_mean - 1)
    npt.assert_array_less(clubse / bser_mean - 1, 0.25)
    #cov_nw_panel close to robust and PanelGLS
    npt.assert_array_less(pnwbse / bser_mean - 1, 0.1)
    #OLS underestimates bse, robust at least 60% larger
    npt.assert_array_less(0.6, bser_mean / res_ols.bse  - 1)

    #cov_hac_panel with uniform_kernel is the same as cov_cluster for balanced
    #panel with full length kernel
    #I fixe default correction to be equal
    cov_uni = sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx,
                              weights_func=sw.weights_uniform,
                              use_correction='c')
    assert_almost_equal(cov_uni, cov_clu, decimal=13)

    #without correction
    cov_clu2 = sw.cov_cluster(mod2.res_pooled, dgp.groups.astype(int),
                              use_correction=False)
    cov_uni2 = sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx,
                              weights_func=sw.weights_uniform,
                              use_correction=False)
    assert_almost_equal(cov_uni2, cov_clu2, decimal=13)

    cov_white = sw.cov_white_simple(mod2.res_pooled)
    cov_pnw0 = sw.cov_nw_panel(mod2.res_pooled, 0, mod2.group.groupidx,
                              use_correction='hac')
    assert_almost_equal(cov_pnw0, cov_white, decimal=13)