Esempio n. 1
0
    def compute_patterns(self, megdata=None, output='patterns'):
        """
        Computes spatial patterns from filter weights.

        Required for visualization.
        """

        vis_dict = {self.handle: self.train_handle, self.rate: 1}
        spatial = self.sess.run(self.demix.W, feed_dict=vis_dict)
        self.filters = np.squeeze(
            self.sess.run(self.tconv1.filters, feed_dict=vis_dict))
        self.patterns = spatial

        if 'patterns' in output:
            data = self.sess.run(self.X, feed_dict=vis_dict)
            data = data.transpose([0, 2, 1])
            data = data.reshape([-1, data.shape[-1]])
            self.dcov, _ = ledoit_wolf(data)
            self.patterns = np.dot(self.dcov, self.patterns)
        if 'full' in output:
            lat_cov, _ = ledoit_wolf(np.dot(data, spatial))
            self.lat_prec = np.linalg.inv(lat_cov)
            self.patterns = np.dot(self.patterns, self.lat_prec)
        self.out_weights, self.out_biases = self.sess.run(
            [self.fin_fc.w, self.fin_fc.b], feed_dict=vis_dict)
        self.out_weights = np.reshape(self.out_weights,
                                      [self.specs['n_ls'], -1, self.n_classes])
def getSigma(datas, method='Simple'):
    asset = datas.columns
    datas['n'] = np.arange(datas.shape[0])
    datas['group'] = pd.qcut(datas.n, 4, labels=False)
    weights = np.arange(1, datas.shape[1]) / 10

    if method == 'Simple':
        sigma_1 = datas.loc[datas.group == 0, asset].cov()
        sigma_2 = datas.loc[datas.group == 1, asset].cov()
        sigma_3 = datas.loc[datas.group == 2, asset].cov()
        sigma_4 = datas.loc[datas.group == 3, asset].cov()
        sigma = 0.1 * sigma_1 + sigma_2 * 0.2 + sigma_3 * 0.3 + sigma_4 * 0.4
    elif method == 'Ledoit':
        sigma_1, a = ledoit_wolf(datas.loc[datas.group == 0, asset])
        sigma_2, a = ledoit_wolf(datas.loc[datas.group == 1, asset])
        sigma_3, a = ledoit_wolf(datas.loc[datas.group == 2, asset])
        sigma_4, a = ledoit_wolf(datas.loc[datas.group == 3, asset])
        sigma = 0.1 * sigma_1 + sigma_2 * 0.2 + sigma_3 * 0.3 + sigma_4 * 0.4
        sigma = pd.DataFrame(sigma)
    elif method == 'DW':
        datas[datas > 0] = 0
        datas['n'] = np.arange(datas.shape[0])
        datas['group'] = pd.qcut(datas.n, 4, labels=False)
        sigma_1 = datas.loc[datas.group == 0, asset].cov()
        sigma_2 = datas.loc[datas.group == 1, asset].cov()
        sigma_3 = datas.loc[datas.group == 2, asset].cov()
        sigma_4 = datas.loc[datas.group == 3, asset].cov()
        sigma = 0.1 * sigma_1 + sigma_2 * 0.2 + sigma_3 * 0.3 + sigma_4 * 0.4
    else:
        pass
    return sigma
Esempio n. 3
0
def calculate_Sigma(X, method_name='SCE'):
    if "pandas" in str(type(X)):
        X = X.values
    sigma = None
    if method_name == 'SCE':
        sigma = np.cov(X, rowvar=False)
    elif method_name == 'LWE':
        sigma = sk_cov.ledoit_wolf(X)[0]
    elif method_name == 'rie' or method_name == 'RIE':

        if X.shape[0] <= X.shape[1]:
            # sigma = sk_cov.ledoit_wolf(X)[0]
            sigma = np.cov(X, rowvar=False)
        else:
            sigma = rmt.optimalShrinkage(X, return_covariance=True)

    elif method_name == 'clipped':
        if X.shape[0] <= X.shape[1]:
            sigma = sk_cov.ledoit_wolf(X)[0]
            # sigma = rmt.optimalShrinkage(X, return_covariance=True)
        else:
            sigma = rmt.clipped(X, return_covariance=True)
    else:
        raise Exception(
            'Error occurred with method name: {}'.format(method_name))

    return sigma
def bhdist(mu1, mu2, mat1, mat2, cov_est=1):
    #Bhattacharyya_distance assuming normal distros
    diff_mn_mat = np.matrix(mu1 - mu2).T
    if (cov_est == 0):
        cov_mat1 = np.cov(np.matrix(mat1).T)
        cov_mat2 = np.cov(np.matrix(mat2).T)
    elif (cov_est == 1):
        cov_mat1 = ledoit_wolf(mat1)[0]
        cov_mat2 = ledoit_wolf(mat2)[0]
    elif (cov_est == 2):
        cov_mat1 = diag_covmat(mat1)
        cov_mat2 = diag_covmat(mat2)
    cov_mat_mn = (cov_mat1 + cov_mat2) / 2
    icov_mat_mn = invcov_mah(cov_mat_mn, 0)
    term1 = np.dot(np.dot(diff_mn_mat.T, icov_mat_mn), diff_mn_mat) / 8
    (sign1, logdet1) = np.linalg.slogdet(cov_mat1)
    (sign2, logdet2) = np.linalg.slogdet(cov_mat2)
    (sign_mn, logdet_mn) = np.linalg.slogdet(cov_mat_mn)
    ln_det_mat1 = logdet1
    ln_det_mat2 = logdet2
    ln_det_mat_mn = logdet_mn
    term2 = (ln_det_mat_mn /
             2) - (ln_det_mat1 + ln_det_mat2
                   ) / 4  #np.log(det_mat_mn/np.sqrt(det_mat1*det_mat2))/2
    result = term1 + term2
    return result[0, 0]
Esempio n. 5
0
 def fit(self, X):
     sc = StandardScaler()  # standardize features
     X_sc = sc.fit_transform(X)
     s = ledoit_wolf(X_sc)[0]
     # rescale
     s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :]
     self.covariance_ = s
Esempio n. 6
0
    def _get_omega(self, returns):
        """
        Get robust covariance matrix for use in Newton solver.
        
        Parameters
        ----------
        returns: numpy array of return data
        
        Returns
        ----------
        omega: array of shape nxn where n is equal to the number of
               securities invovled
        """
        corr_returns = returns[-self.corr_window:, :]
        cov_returns = returns[-self.cov_window:, :]

        if self.cov_est == 'oas':
            omega = OAS().fit(corr_returns).covariance_ * 10**4
        elif self.cov_est == 'empirical':
            omega = EmpiricalCovariance().fit(corr_returns).covariance_ * 10**4
        else:
            corr = np.corrcoef(corr_returns, rowvar=False)
            cov_diag = np.diag(np.sqrt(np.var(cov_returns, axis=0)))
            omega = cov_diag @ corr @ cov_diag
            if self.lw_shrink is None:
                lw = ledoit_wolf(corr_returns)[1]
                omega = shrunk_covariance(omega, shrinkage=lw) * 10**4
            else:
                omega = shrunk_covariance(omega,
                                          shrinkage=self.lw_shrink) * 10**4
        return omega
def _cov(X, shrinkage=None):
    """Estimate covariance matrix (using optional shrinkage).
    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
        Input data.
    shrinkage : string or float, optional
        Shrinkage parameter, possible values:
          - None or 'empirical': no shrinkage (default).
          - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.
          - float between 0 and 1: fixed shrinkage parameter.
    Returns
    -------
    s : array, shape (n_features, n_features)
        Estimated covariance matrix.
    """
    shrinkage = "empirical" if shrinkage is None else shrinkage
    if isinstance(shrinkage, str):
        if shrinkage == 'auto':
            sc = StandardScaler()  # standardize features
            X = sc.fit_transform(X)
            s = ledoit_wolf(X)[0]
            # rescale
            s = sc.scale_[:, np.newaxis] * s * sc.scale_[np.newaxis, :]
        elif shrinkage == 'empirical':
            s = empirical_covariance(X)
        else:
            raise ValueError('unknown shrinkage parameter')
    elif isinstance(shrinkage, float) or isinstance(shrinkage, int):
        if shrinkage < 0 or shrinkage > 1:
            raise ValueError('shrinkage parameter must be between 0 and 1')
        s = shrunk_covariance(empirical_covariance(X), shrinkage)
    else:
        raise TypeError('shrinkage must be of string or int type')
    return s
Esempio n. 8
0
File: utils.py Progetto: yyht/SSD
def get_scores_one_cluster(ftrain, ftest, food, shrunkcov=False):
    if shrunkcov:
        print("Using ledoit-wolf covariance estimator.")
        cov = lambda x: ledoit_wolf(x)[0]
    else:
        cov = lambda x: np.cov(x.T, bias=True)

    # ToDO: Simplify these equations
    dtest = np.sum(
        (ftest - np.mean(ftrain, axis=0, keepdims=True))
        * (
            np.linalg.pinv(cov(ftrain)).dot(
                (ftest - np.mean(ftrain, axis=0, keepdims=True)).T
            )
        ).T,
        axis=-1,
    )

    dood = np.sum(
        (food - np.mean(ftrain, axis=0, keepdims=True))
        * (
            np.linalg.pinv(cov(ftrain)).dot(
                (food - np.mean(ftrain, axis=0, keepdims=True)).T
            )
        ).T,
        axis=-1,
    )

    return dtest, dood
def var_info(mat1, mat2):
    n = mat1.shape[1]
    mat0 = np.hstack([mat1,mat2])
    cov_mat0 = ledoit_wolf(mat0)[0]
    cov_mat1 = ledoit_wolf(mat1)[0]
    cov_mat2 = ledoit_wolf(mat2)[0]
    (sign0, logdet0) = np.linalg.slogdet(cov_mat0)
    (sign1, logdet1) = np.linalg.slogdet(cov_mat1)
    (sign2, logdet2) = np.linalg.slogdet(cov_mat2)
    ln_det_mat0 = logdet0
    ln_det_mat1 = logdet1
    ln_det_mat2 = logdet2
    H_mat1 = 0.5*np.log(np.power((2*np.exp(1)*np.pi), n)) + 0.5*ln_det_mat1
    H_mat2 = 0.5*np.log(np.power((2*np.exp(1)*np.pi), n)) + 0.5*ln_det_mat2
    MI = 0.5*(ln_det_mat1 + ln_det_mat2 - ln_det_mat0)
    return H_mat1 + H_mat2 - 2*MI;
Esempio n. 10
0
def sk_ledoit_wolf(X):
    """Estimate inverse covariance via scikit-learn ledoit_wolf function.
    """
    print("Ledoit-Wolf (sklearn)")
    lw_cov_, _ = ledoit_wolf(X)
    lw_prec_ = np.linalg.inv(lw_cov_)
    return lw_cov_, lw_prec_
Esempio n. 11
0
def rpw_ledoit_wolf(prices,
                    initial_weights=None,
                    risk_weights=None,
                    risk_parity_method='ccd',
                    maximum_iterations=100,
                    tolerance=1E-8,
                    min_assets_number=2,
                    max_assets_number=6):
    """
    Calculates the equal risk contribution / risk parity weights given a
    DataFrame of returns.
    Wraps mean_var_weights with ledoit_wolf covariance calculation method

    Args:
        * prices (DataFrame): Prices for multiple securities.
        * initial_weights (list): Starting asset weights [default inverse vol].
        * risk_weights (list): Risk target weights [default equal weight].
        * risk_parity_method (str): Risk parity estimation method.
            Currently supported:
                - ccd (cyclical coordinate descent)[default]
        * maximum_iterations (int): Maximum iterations in iterative solutions.
        * tolerance (float): Tolerance level in iterative solutions.
        * min_assets_number: mininial assets number in portfolio at time t
        * max_assets_number: maxinial assets number in portfolio at time t

    Returns:
        Series {col_name: weight}

    """
    r = prices.to_returns().dropna()
    covar = ledoit_wolf(r)[0]
    return covar
Esempio n. 12
0
    def likelihood(self, y_obs, y_sim):
        # print("DEBUG: SynLiklihood.likelihood().")
        if not isinstance(y_obs, list):
            raise TypeError('Observed data is not of allowed types')

        if not isinstance(y_sim, list):
            raise TypeError('simulated data is not of allowed types')

        # Extract summary statistics from the observed data
        if (self.stat_obs is None or y_obs != self.data_set):
            self.stat_obs = self.statistics_calc.statistics(y_obs)
            self.data_set = y_obs

        # Extract summary statistics from the simulated data
        stat_sim = self.statistics_calc.statistics(y_sim)

        # Compute the mean, robust precision matrix and determinant of precision matrix
        # print("DEBUG: meansim computation.")
        mean_sim = np.mean(stat_sim, 0)
        # print("DEBUG: robust_precision_sim computation.")
        lw_cov_, _ = ledoit_wolf(stat_sim)
        robust_precision_sim = np.linalg.inv(lw_cov_)
        # print("DEBUG: robust_precision_sim_det computation..")
        robust_precision_sim_det = np.linalg.det(robust_precision_sim)
        # print("DEBUG: combining.")
        tmp1 = robust_precision_sim * np.array(
            self.stat_obs.reshape(-1, 1) - mean_sim.reshape(-1, 1)).T
        tmp2 = np.exp(
            np.sum(
                -0.5 *
                np.sum(np.array(self.stat_obs - mean_sim) * np.array(tmp1).T,
                       axis=1)))
        tmp3 = pow(np.sqrt((1 / (2 * np.pi)) * robust_precision_sim_det),
                   self.stat_obs.shape[0])
        return tmp2 * tmp3
Esempio n. 13
0
def sk_ledoit_wolf(X):
    '''Estimate inverse covariance via scikit-learn ledoit_wolf function.
    '''
    print 'Ledoit-Wolf (sklearn)'
    lw_cov_, _ = ledoit_wolf(X)
    lw_prec_ = np.linalg.inv(lw_cov_)
    return lw_cov_, lw_prec_
Esempio n. 14
0
def estimate(df,
             mean_est='equal_weights',
             cov_est='equal_weights',
             alpha=1e-10):
    """
        Estimate mean and covariance given historical data

        Parameters
        ----------
        df: pd.DataFrame (n.sample, n.feature)
            historical data

        mean_est: str
            method to estimate mean
            selected from {'equal_weights', 'exponential_weights', 'linear-weights'}

        cov_est: str
            method to estimate covariance
            selected from {'equal_weights', 'exponential_weights', 'ledoit_wolf', 'oas'}

        alpha: float, required if exponential_weights selected
            [0, 1], larger alpha means more weights on near
            exponential_weights -> equal_weights if alpha -> 0

        Return
        ------
        mean, cov: np.array
            estimated mean (n.feature) and covariance (n.feature * n.feature)
    """

    if not isinstance(df, pd.DataFrame):
        raise TypeError('Historical data must be data frame.')

    if not isinstance(alpha, float):
        raise TypeError('Parameter alpha must be float.')

    if mean_est == 'equal_weights':
        mean = df.mean().values
    elif mean_est == 'exponential_weights':
        mean = df.ewm(alpha=alpha).mean().iloc[-1].values
    elif mean_est == 'linear-weights':
        weights = np.array(range(1, df.shape[0] + 1))
        mean = df.values.T @ weights / sum(weights)
    else:
        raise ValueError('Method does not exist.')

    if cov_est == 'equal_weights':
        cov = df.cov().values
    elif cov_est == 'exponential_weights':
        cov = df.ewm(alpha=alpha).cov().iloc[-df.shape[1]:].values
    elif cov_est == 'ledoit_wolf':
        cov, _ = ledoit_wolf(df)
    elif cov_est == 'oas':
        cov, _ = oas(df)
    else:
        raise ValueError('Method does not exist.')

    return mean, cov
Esempio n. 15
0
def _construct_mcca_gevp(Xs, regs=None, as_lists=False):
    r"""
    Constructs the matrices for the MCCA generalized eigenvector problem
    :math:`LHS v = \lambda RHS v`.

    Parameters
    ----------
    Xs : list of array-likes or numpy.ndarray
        The list of data matrices

    regs : None | float | 'lw' | 'oas' or list of them, shape (n_views)
        As described in ``mvlearn.mcca.mcca.MCCA``

    as_lists : bool
        If True, returns LHS and RHS as lists of composing blocks instead
        of their composition into full matrices.

    Returns
    -------
    LHS, RHS : numpy.ndarray, (sum_b n_features_b, sum_b n_features_b)
        Left and right hand side matrices for the GEVP
    """
    Xs, n_views, n_samples, n_features = check_Xs(
        Xs, multiview=True, return_dimensions=True
    )
    regs = _check_regs(regs, n_views)

    LHS = [[None for b in range(n_views)] for b in range(n_views)]
    RHS = [None for b in range(n_views)]

    # cross covariance matrices
    for (a, b) in combinations(range(n_views), 2):
        LHS[a][b] = Xs[a].T @ Xs[b]
        LHS[b][a] = LHS[a][b].T

    # view covariance matrices, possibly regularized
    for b in range(n_views):
        if regs[b] is None:
            RHS[b] = Xs[b].T @ Xs[b]
        elif isinstance(regs[b], Number):
            RHS[b] = (1 - regs[b]) * Xs[b].T @ Xs[b] + \
                regs[b] * np.eye(n_features[b])
        elif isinstance(regs[b], str):
            if regs[b] == "lw":
                RHS[b] = ledoit_wolf(Xs[b])[0]
            elif regs[b] == "oas":
                RHS[b] = oas(Xs[b])[0]
            # put back on scale of X^TX as oppose to
            # proper cov est returned by these functions
            RHS[b] *= n_samples

        LHS[b][b] = RHS[b]

    if not as_lists:
        LHS = np.block(LHS)
        RHS = block_diag(*RHS)

    return LHS, RHS
Esempio n. 16
0
    def ledoit_wolf(self):
        """
        Calculate the Ledoit-Wolf shrinkage estimate.

        :return: shrunk sample covariance matrix
        :rtype: np.ndarray
        """
        X = np.nan_to_num(self.X.values)
        shrunk_cov, self.delta = covariance.ledoit_wolf(X)
        return self.format_and_annualise(shrunk_cov)
Esempio n. 17
0
    def ledoit_wolf(self):
        """
        Calculate the Ledoit-Wolf shrinkage estimate.

        :return: shrunk sample covariance matrix
        :rtype: np.ndarray
        """
        X = np.nan_to_num(self.X.values)
        shrunk_cov, self.delta = covariance.ledoit_wolf(X)
        return self.format_and_annualise(shrunk_cov)
Esempio n. 18
0
    def calculate_covariance(self, t):

        covariance = self.pert_kernel.covariance(t, self.theta[t - 1],
                                                 self.delta[t - 1],
                                                 self.epsilon[t],
                                                 self.wt[t - 1])

        if np.linalg.det(covariance) < 1.E-15:
            covariance = ledoit_wolf(self.theta[t - 1])[0]

        return covariance
Esempio n. 19
0
def prewhiten(betas, residuals):
    cov_ledoit, _ = ledoit_wolf(residuals)
    Uc, Dc, Vhc = svd(cov_ledoit, full_matrices=False)
    cov_ledoit_sqrt = np.dot(Uc * np.sqrt(Dc), Vhc)
    prewhiten_ok = True
    try:
        betas_prewhitened = np.dot(betas, np.linalg.inv(cov_ledoit_sqrt))
    except np.linalg.LinAlgError:
        betas_prewhitened = betas  #if there are problems use original
        prewhiten_ok = False
    return (betas_prewhitened, prewhiten_ok)
Esempio n. 20
0
 def kernel(self,Pid,t):                         
         if self.variance_method == 4:
             covariance = self.variance[Pid]
         else:
             covariance = self.variance
         if np.linalg.det(covariance) <1.E-15:
             #maybe singular matrix; check diagonals for small values
             #if self.verbose:
             #	print "Variance is a singular matrix", covariance
             #	print "using l2 shrinkage with the Ledoit-Wolf estimator..."
             covariance, _  =  ledoit_wolf(self.theta[t])
         return scipy.stats.multivariate_normal(mean=self.theta[t][Pid],cov=covariance,allow_singular=True).pdf
Esempio n. 21
0
    def get_var(self, t, params):
        ''' Input: 
                t: iteration level
                pms: parameter vector for all particles from previous iteration
                Returns:
		particle covariance with Ledoit-Wolf estimator
                '''
        if self.start:
            return self.first_iter(t, params)
        else:
            var, _ = ledoit_wolf(params)
        return var
Esempio n. 22
0
def invcov_mah(mat, cov_est=1):
    if (cov_est == 0):
        mat = np.cov(np.matrix(mat).T)
    elif (cov_est == 1):
        mat = ledoit_wolf(mat)[0]
    elif (cov_est == 2):
        mat = diag_covmat(mat)
    try:
        icov_mat = np.linalg.inv(mat)
    except:
        icov_mat = np.linalg.pinv(mat)
    return icov_mat
Esempio n. 23
0
File: alloc.py Progetto: dxcv/pyetf
def rpw_lstm(prices,
             initial_weights=None,
             risk_weights=None,
             risk_parity_method='ccd',
             maximum_iterations=100,
             tolerance=1E-8,
             min_assets_number=2,
             max_assets_number=6):
    r = prices.to_returns().dropna()
    covar = ledoit_wolf(r)[0]
    for i in range(len(prices.columns)):
        var, _ = forecast_var_from_lstm(prices[prices.columns[i]])
        covar[i, i] = (var / 100.0)  #**(0.5)
Esempio n. 24
0
def approximated_max_kelly(data):
    """Find a approximated solution of the portofolio based on kelly criterion."""
    returns_data = data.pct_change().dropna()
    _, delta = ledoit_wolf(data)
    sigma = CovarianceShrinkage(data).shrunk_covariance(delta=delta)
    mu = returns_data.mean(axis=0)
    A = 0.5 * sigma
    A = np.hstack((A, np.ones((sigma.shape[0], 1))))
    A = np.vstack((A, np.ones((1, sigma.shape[0] + 1))))
    A[-1, -1] = 0.0
    B = np.hstack((mu, [1]))
    w = np.dot(np.linalg.inv(A), B)
    return pd.DataFrame([w[:-1]], columns=data.columns)
    def _estimate_asset_cov(self, trade_date):

        index_ids = self.index_ids

        df_index_inc = self._load_index_inc(trade_date)

        df_index_cov = pd.DataFrame(ledoit_wolf(df_index_inc.dropna(),
                                                assume_centered=False)[0],
                                    index=index_ids,
                                    columns=index_ids)
        ser_index_cov = df_index_cov.stack().rename(trade_date)

        return ser_index_cov
Esempio n. 26
0
def bhdist(mu1, mu2, mat1, mat2, reg=0):
    #Bhattacharyya_distance assuming normal distros
    #expects columns to be observations and rows features
    diff_mn_mat = np.matrix(mu1 - mu2).T
    if (reg == 1):
        cov_mat1 = ledoit_wolf(mat1)[0]
        cov_mat2 = ledoit_wolf(mat2)[0]
    else:
        cov_mat1 = np.cov(mat1)
        cov_mat2 = np.cov(mat2)
    cov_mat_mn = (cov_mat1 + cov_mat2) / 2
    icov_mat_mn = np.linalg.inv(cov_mat_mn)
    term1 = np.dot(np.dot(diff_mn_mat.T, icov_mat_mn), diff_mn_mat) / 8
    (sign1, logdet1) = np.linalg.slogdet(cov_mat1)
    (sign2, logdet2) = np.linalg.slogdet(cov_mat2)
    (sign_mn, logdet_mn) = np.linalg.slogdet(cov_mat_mn)
    ln_det_mat1 = logdet1
    ln_det_mat2 = logdet2
    ln_det_mat_mn = logdet_mn
    term2 = (ln_det_mat_mn / 2) - (ln_det_mat1 + ln_det_mat2) / 4
    result = term1 + term2
    return result[0, 0]
def invcov_mah(mat, cov_est=1):
    #this function is mostly for regularising the covariance matrix used in the mahalanobis and bhattacharyya distances (cov_est setting). It also attempts to compute either en inverse or pseudo-inverse covariance matrix.
    if (cov_est == 0):
        mat = np.cov(np.matrix(mat).T)
    elif (cov_est == 1):
        mat = ledoit_wolf(mat)[0]
    elif (cov_est == 2):
        mat = diag_covmat(mat)
    try:
        icov_mat = np.linalg.inv(mat)
    except:
        icov_mat = np.linalg.pinv(mat)
    return icov_mat
Esempio n. 28
0
 def _initialize(self, X):
     n_samples, observed_dimensions = X.shape
     kmeans = KMeans(self._n_components, n_init=self._n_init)
     lab = kmeans.fit(X).predict(X)
     self._covs = []
     for i in range(self._n_components):
         cl_indxs = np.where(lab == i)[0]
         rnd_indxs = np.random.choice(range(n_samples), size=5)
         indx = np.concatenate([cl_indxs, rnd_indxs])
         # Avoid non-singular covariance
         self._covs.append(ledoit_wolf(X[indx])[0])
     self._pi = np.ones(self._n_components) / self._n_components
     self._log_pi = np.log(self._pi)
     self._mus = np.array(kmeans.cluster_centers_)
Esempio n. 29
0
def var_info(mat1, mat2, reg=0):
    #Variation of Information assuming normal distros
    #expects rows to be observations and columns features
    #expects same number of observations in both matrices
    #needs checking....
    n = mat1.shape[0]
    mat0 = np.hstack([mat1, mat2])
    if (reg == 1):
        cov_mat0 = ledoit_wolf(mat0)[0]
        cov_mat1 = ledoit_wolf(mat1)[0]
        cov_mat2 = ledoit_wolf(mat2)[0]
    else:
        cov_mat0 = np.cov(mat0)
        cov_mat1 = np.cov(mat1)
        cov_mat2 = np.cov(mat2)
    (sign0,
     logdet0) = 0.5 * n * np.linalg.slogdet(cov_mat0 * 2 * np.exp(1) * np.pi)
    (sign1,
     logdet1) = 0.5 * n * np.linalg.slogdet(cov_mat1 * 2 * np.exp(1) * np.pi)
    (sign2,
     logdet2) = 0.5 * n * np.linalg.slogdet(cov_mat2 * 2 * np.exp(1) * np.pi)
    MI = (logdet1 + logdet2 - logdet0)
    return logdet0 - MI
Esempio n. 30
0
def rpw_future(prices,
               initial_weights=None,
               risk_weights=None,
               risk_parity_method='ccd',
               maximum_iterations=100,
               tolerance=1E-8,
               min_assets_number=2,
               max_assets_number=6):
    r = prices.to_returns().dropna()
    covar = ledoit_wolf(r)[0]
    for i in range(len(r.columns)):
        _, var = future_mean_var(prices[prices.columns[i]].values)
        covar[i, i] = var * 100
    return covar
Esempio n. 31
0
    def test_ledoit_wolf(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        result = df.covariance.ledoit_wolf()
        expected = covariance.ledoit_wolf(iris.data)

        self.assertEqual(len(result), 2)

        self.assertIsInstance(result[0], pdml.ModelFrame)
        tm.assert_index_equal(result[0].index, df.data.columns)
        tm.assert_index_equal(result[0].columns, df.data.columns)
        self.assert_numpy_array_almost_equal(result[0].values, expected[0])

        self.assert_numpy_array_almost_equal(result[1], expected[1])
Esempio n. 32
0
    def test_ledoit_wolf(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        result = df.covariance.ledoit_wolf()
        expected = covariance.ledoit_wolf(iris.data)

        self.assertEqual(len(result), 2)

        self.assertTrue(isinstance(result[0], pdml.ModelFrame))
        self.assert_index_equal(result[0].index, df.data.columns)
        self.assert_index_equal(result[0].columns, df.data.columns)
        self.assert_numpy_array_almost_equal(result[0].values, expected[0])

        self.assert_numpy_array_almost_equal(result[1], expected[1])
Esempio n. 33
0
def get_covariance_estimator(estimator):
    if hasattr(estimator, "__call__"):
        f = estimator
    elif type(estimator) == str:
        if estimator == "MCD" or estimator == "mcd" or estimator == "MinCovDet" or estimator == "fast_mcd":
            f = fast_mcd
        elif estimator == "Ledoit-Wolf" or estimator == "LW" or estimator == "lw":
            f = lambda x: ledoit_wolf(x)[0]
        elif estimator == "OAS" or estimator == "oas":
            f = lambda x: oas(x)[0]
        else:
            f = empirical_covariance
    else:
        f = empirical_covariance

    return f
Esempio n. 34
0
def correlation(X):
    "Compute correlation matrix"
    X = X - X.mean(axis=0)
    X /= X.std(axis=0)
    cov, _ = covariance.ledoit_wolf(X) # To have robust correlations, use MCD, Minimum cov determinant, instead
    return cov
Esempio n. 35
0
def test_ledoit_wolf():
    """Tests LedoitWolf module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_centered)
    shrinkage_ = lw.shrinkage_
    score_ = lw.score(X_centered)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
                                              assume_centered=True),
                        shrinkage_)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
                                assume_centered=True, block_size=6),
                        shrinkage_)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_centered,
                                                        assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False, assume_centered=True)
    lw.fit(X_centered)
    assert_almost_equal(lw.score(X_centered), score_, 4)
    assert(lw.precision_ is None)

    # (too) large data set
    X_large = np.ones((20, 200))
    assert_raises(MemoryError, ledoit_wolf, X_large, block_size=100)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
    assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
    assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
    assert_almost_equal(lw.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test with one sample
    X_1sample = np.arange(5)
    lw = LedoitWolf()
    with warnings.catch_warnings(record=True):
        lw.fit(X_1sample)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), score_, 4)
    assert(lw.precision_ is None)
Esempio n. 36
0
prec *= d
prec *= d[:, np.newaxis]
X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
X -= X.mean(axis=0)
X /= X.std(axis=0)

##############################################################################
# Estimate the covariance
emp_cov = np.dot(X.T, X) / n_samples

model = GraphLassoCV()
model.fit(X)
cov_ = model.covariance_
prec_ = model.precision_

lw_cov_, _ = ledoit_wolf(X)
lw_prec_ = linalg.inv(lw_cov_)

##############################################################################
# Plot the results
pl.figure(figsize=(10, 6))
pl.subplots_adjust(left=0.02, right=0.98)

# plot the covariances
covs = [('Empirical', emp_cov), ('Ledoit-Wolf', lw_cov_),
        ('GraphLasso', cov_), ('True', cov)]
vmax = cov_.max()
for i, (name, this_cov) in enumerate(covs):
    pl.subplot(2, 4, i + 1)
    pl.imshow(this_cov, interpolation='nearest', vmin=-vmax, vmax=vmax,
              cmap=pl.cm.RdBu_r)
Esempio n. 37
0
def test_ledoit_wolf():
    # Tests LedoitWolf module on a simple dataset.
    # test shrinkage coeff on a simple data set
    X_centered = X - X.mean(axis=0)
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_centered)
    shrinkage_ = lw.shrinkage_

    score_ = lw.score(X_centered)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered,
                                              assume_centered=True),
                        shrinkage_)
    assert_almost_equal(ledoit_wolf_shrinkage(X_centered, assume_centered=True,
                                              block_size=6),
                        shrinkage_)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_centered,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True)
    scov.fit(X_centered)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf(assume_centered=True)
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False, assume_centered=True)
    lw.fit(X_centered)
    assert_almost_equal(lw.score(X_centered), score_, 4)
    assert(lw.precision_ is None)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, shrinkage_, 4)
    assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X))
    assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1])
    assert_almost_equal(lw.score(X), score_, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shrinkage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shrinkage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test with one sample
    # warning should be raised when using only 1 sample
    X_1sample = np.arange(5).reshape(1, 5)
    lw = LedoitWolf()
    assert_warns(UserWarning, lw.fit, X_1sample)
    assert_array_almost_equal(lw.covariance_,
                              np.zeros(shape=(5, 5), dtype=np.float64))

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), score_, 4)
    assert(lw.precision_ is None)
def benchmark3():
    """Compare group_sparse_covariance result for different initializations.
    """
    ## parameters = {'n_tasks': 10, 'n_var': 50, 'density': 0.15,
    ##               'alpha': .001, 'tol': 1e-2, 'max_iter': 100}
    parameters = {'n_var': 40, 'n_tasks': 10, 'density': 0.15,
                  'alpha': .01, 'tol': 1e-3, 'max_iter': 100}

    mem = joblib.Memory(".")

    _, _, gt = create_signals(parameters,
                              output_dir="_prof_group_sparse_covariance")
    signals = gt["signals"]

    emp_covs, n_samples = empirical_covariances(signals)
    print("alpha max: " + str(compute_alpha_max(emp_covs, n_samples)))

    # With diagonal elements initialization
    probe1 = ScoreProbe()
    est_precs1, probe1 = mem.cache(modified_gsc)(signals, parameters, probe1)
    probe1.comment = "diagonal"  # set after execution for joblib not to see it
    probe1.plot()

    # With Ledoit-Wolf initialization
    ld = np.empty(emp_covs.shape)
    for k in range(emp_covs.shape[-1]):
        ld[..., k] = np.linalg.inv(ledoit_wolf(signals[k])[0])

    probe1 = ScoreProbe()
    est_precs1, probe1 = utils.timeit(mem.cache(modified_gsc))(
        signals, parameters, probe=probe1)
    probe1.comment = "diagonal"  # for joblib to ignore this value

    probe2 = ScoreProbe()
    parameters["precisions_init"] = ld
    est_precs2, probe2 = utils.timeit(mem.cache(modified_gsc))(
        signals, parameters, probe=probe2)
    probe2.comment = "ledoit-wolf"

    print("difference between final estimates (max norm) %.2e"
          % abs(est_precs1 - est_precs2).max())

    pl.figure()
    pl.semilogy(probe1.timings[1:], probe1.max_norm,
                "+-", label=probe1.comment)
    pl.semilogy(probe2.timings[1:], probe2.max_norm,
                "+-", label=probe2.comment)
    pl.xlabel("Time [s]")
    pl.ylabel("Max norm")
    pl.grid()
    pl.legend(loc="best")

    pl.figure()
    pl.plot(probe1.timings, probe1.objective,
                "+-", label=probe1.comment)
    pl.plot(probe2.timings, probe2.objective,
                "+-", label=probe2.comment)
    pl.xlabel("Time [s]")
    pl.ylabel("objective")
    pl.grid()
    pl.legend(loc="best")

    pl.show()
Esempio n. 39
0
def _lwf(X):
    """Wrapper for sklearn ledoit wolf covariance estimator"""
    C, _ = ledoit_wolf(X.T)
    return C
Esempio n. 40
0
def test_ledoit_wolf():
    """Tests LedoitWolf module on a simple dataset.

    """
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X, assume_centered=True)
    assert_almost_equal(lw.shrinkage_, 0.00192, 4)
    assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X,
                                                        assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X, assume_centered=True)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d, assume_centered=True)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d,
                                                         assume_centered=True)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal((X_1d ** 2).sum() / n_samples, lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X, assume_centered=True)
    assert_almost_equal(lw.score(X, assume_centered=True), -2.89795, 4)
    assert(lw.precision_ is None)

    # Same tests without assuming centered data
    # test shrinkage coeff on a simple data set
    lw = LedoitWolf()
    lw.fit(X)
    assert_almost_equal(lw.shrinkage_, 0.007582, 4)
    assert_almost_equal(lw.score(X), 2.243483, 4)
    # compare shrunk covariance obtained from data and from MLE estimate
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    # compare estimates given by LW and ShrunkCovariance
    scov = ShrunkCovariance(shrinkage=lw.shrinkage_)
    scov.fit(X)
    assert_array_almost_equal(scov.covariance_, lw.covariance_, 4)

    # test with n_features = 1
    X_1d = X[:, 0].reshape((-1, 1))
    lw = LedoitWolf()
    lw.fit(X_1d)
    lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d)
    assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4)
    assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_)
    assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4)

    # test shrinkage coeff on a simple data set (without saving precision)
    lw = LedoitWolf(store_precision=False)
    lw.fit(X)
    assert_almost_equal(lw.score(X), 2.2434839, 4)
    assert(lw.precision_ is None)