예제 #1
0
def min_cov_determinant(prices, frequency=252, random_state=None):
    """
    Calculate the minimum covariance determinant, an estimator of the covariance matrix
    that is more robust to noise.

    :param prices: adjusted closing prices of the asset, each row is a date
                   and each column is a ticker/id.
    :type prices: pd.DataFrame
    :param frequency: number of time periods in a year, defaults to 252 (the number
                      of trading days in a year)
    :type frequency: int, optional
    :param random_state: random seed to make results reproducible, defaults to None
    :type random_state: int, optional
    :return: annualised estimate of covariance matrix
    :rtype: pd.DataFrame
    """
    if not isinstance(prices, pd.DataFrame):
        warnings.warn("prices are not in a dataframe", RuntimeWarning)
        prices = pd.DataFrame(prices)
    assets = prices.columns
    X = prices.pct_change().dropna(how="all")
    X = np.nan_to_num(X.values)
    raw_cov_array = covariance.fast_mcd(X, random_state=random_state)[1]
    return pd.DataFrame(raw_cov_array, index=assets,
                        columns=assets) * frequency
예제 #2
0
def _mcd(X):
    """Wrapper for sklearn mcd covariance estimator.
    
    Parameters
    ----------
    X : ndarray
        EEG signal, shape (n_channels, n_samples).
    
    Returns
    -------
    C : ndarray
        Estimated covariance, shape (n_channels, n_channels).
    """
    _, C, _, _ = fast_mcd(X.T)
    return C
예제 #3
0
def launch_mcd_on_dataset(n_samples, n_features, n_outliers,
                          tol_loc, tol_cov, tol_support, correction):
    """

    """
    data = np.random.randn(n_samples, n_features)
    # add some outliers
    outliers_index = np.random.permutation(n_samples)[:n_outliers]
    outliers_offset = 10. * \
        (np.random.randint(2, size=(n_outliers, n_features)) - 0.5)
    data[outliers_index] += outliers_offset
    inliers_mask = np.ones(n_samples).astype(bool)
    inliers_mask[outliers_index] = False

    # compute MCD directly
    T, S, H = fast_mcd(data, correction=correction)
    # compare with the estimates learnt from the inliers
    pure_data = data[inliers_mask]
    error_location = np.sum((pure_data.mean(0) - T) ** 2)
    assert(error_location < tol_loc)
    emp_cov = EmpiricalCovariance().fit(pure_data)
    #print emp_cov.error_norm(S)
    assert(emp_cov.error_norm(S) < tol_cov)
    assert(np.sum(H) > tol_support)
    # check improvement
    if (n_outliers / float(n_samples) > 0.1) and (n_features > 1):
        error_bad_location = np.sum((data.mean(0) - T) ** 2)
        assert(error_bad_location > error_location)
        bad_emp_cov = EmpiricalCovariance().fit(data)
        assert(emp_cov.error_norm(S) < bad_emp_cov.error_norm(S))

    # compute MCD by fitting an object
    mcd_fit = MCD().fit(data)
    T = mcd_fit.location_
    S = mcd_fit.covariance_
    H = mcd_fit.support_
    # compare with the estimates learnt from the inliers
    error_location = np.sum((pure_data.mean(0) - T) ** 2)
    assert(error_location < tol_loc)
    assert(emp_cov.error_norm(S) < tol_cov)
    assert(np.sum(H) > tol_support)
    # check improvement
    if (n_outliers / float(n_samples) > 0.1) and (n_features > 1):
        error_bad_location = np.sum((data.mean(0) - T) ** 2)
        assert(error_bad_location > error_location)
        bad_emp_cov = EmpiricalCovariance().fit(data)
        assert(emp_cov.error_norm(S) < bad_emp_cov.error_norm(S))
예제 #4
0
def min_cov_determinant(prices, frequency=252, random_state=None):
    """
    Calculate the minimum covariance determinant, an estimator of the covariance matrix
    that is more robust to noise.

    :param prices: adjusted closing prices of the asset, each row is a date
                   and each column is a ticker/id.
    :type prices: pd.DataFrame
    :param frequency: number of time periods in a year, defaults to 252 (the number
                      of trading days in a year)
    :type frequency: int, optional
    :param random_state: random seed to make results reproducible, defaults to None
    :type random_state: int, optional
    :return: annualised estimate of covariance matrix
    :rtype: pd.DataFrame
    """
    if not isinstance(prices, pd.DataFrame):
        warnings.warn("prices are not in a dataframe", RuntimeWarning)
        prices = pd.DataFrame(prices)
    assets = prices.columns
    X = prices.pct_change().dropna(how="all")
    X = np.nan_to_num(X.values)
    raw_cov_array = covariance.fast_mcd(X, random_state=random_state)[1]
    return pd.DataFrame(raw_cov_array, index=assets, columns=assets) * frequency
예제 #5
0
def _mcd(X):
    """Wrapper for sklearn mcd covariance estimator"""
    _, C, _, _ = fast_mcd(X.T)
    return C
예제 #6
0
def _mcd(X):
    """Wrapper for sklearn mcd covariance estimator"""
    _, C, _, _ = fast_mcd(X.T)
    return C
예제 #7
0
def test_fast_mcd_on_invalid_input():
    X = np.arange(100)
    msg = "Expected 2D array, got 1D array instead"
    with pytest.raises(ValueError, match=msg):
        fast_mcd(X)