def min_cov_determinant(prices, frequency=252, random_state=None): """ Calculate the minimum covariance determinant, an estimator of the covariance matrix that is more robust to noise. :param prices: adjusted closing prices of the asset, each row is a date and each column is a ticker/id. :type prices: pd.DataFrame :param frequency: number of time periods in a year, defaults to 252 (the number of trading days in a year) :type frequency: int, optional :param random_state: random seed to make results reproducible, defaults to None :type random_state: int, optional :return: annualised estimate of covariance matrix :rtype: pd.DataFrame """ if not isinstance(prices, pd.DataFrame): warnings.warn("prices are not in a dataframe", RuntimeWarning) prices = pd.DataFrame(prices) assets = prices.columns X = prices.pct_change().dropna(how="all") X = np.nan_to_num(X.values) raw_cov_array = covariance.fast_mcd(X, random_state=random_state)[1] return pd.DataFrame(raw_cov_array, index=assets, columns=assets) * frequency
def _mcd(X): """Wrapper for sklearn mcd covariance estimator. Parameters ---------- X : ndarray EEG signal, shape (n_channels, n_samples). Returns ------- C : ndarray Estimated covariance, shape (n_channels, n_channels). """ _, C, _, _ = fast_mcd(X.T) return C
def launch_mcd_on_dataset(n_samples, n_features, n_outliers, tol_loc, tol_cov, tol_support, correction): """ """ data = np.random.randn(n_samples, n_features) # add some outliers outliers_index = np.random.permutation(n_samples)[:n_outliers] outliers_offset = 10. * \ (np.random.randint(2, size=(n_outliers, n_features)) - 0.5) data[outliers_index] += outliers_offset inliers_mask = np.ones(n_samples).astype(bool) inliers_mask[outliers_index] = False # compute MCD directly T, S, H = fast_mcd(data, correction=correction) # compare with the estimates learnt from the inliers pure_data = data[inliers_mask] error_location = np.sum((pure_data.mean(0) - T) ** 2) assert(error_location < tol_loc) emp_cov = EmpiricalCovariance().fit(pure_data) #print emp_cov.error_norm(S) assert(emp_cov.error_norm(S) < tol_cov) assert(np.sum(H) > tol_support) # check improvement if (n_outliers / float(n_samples) > 0.1) and (n_features > 1): error_bad_location = np.sum((data.mean(0) - T) ** 2) assert(error_bad_location > error_location) bad_emp_cov = EmpiricalCovariance().fit(data) assert(emp_cov.error_norm(S) < bad_emp_cov.error_norm(S)) # compute MCD by fitting an object mcd_fit = MCD().fit(data) T = mcd_fit.location_ S = mcd_fit.covariance_ H = mcd_fit.support_ # compare with the estimates learnt from the inliers error_location = np.sum((pure_data.mean(0) - T) ** 2) assert(error_location < tol_loc) assert(emp_cov.error_norm(S) < tol_cov) assert(np.sum(H) > tol_support) # check improvement if (n_outliers / float(n_samples) > 0.1) and (n_features > 1): error_bad_location = np.sum((data.mean(0) - T) ** 2) assert(error_bad_location > error_location) bad_emp_cov = EmpiricalCovariance().fit(data) assert(emp_cov.error_norm(S) < bad_emp_cov.error_norm(S))
def min_cov_determinant(prices, frequency=252, random_state=None): """ Calculate the minimum covariance determinant, an estimator of the covariance matrix that is more robust to noise. :param prices: adjusted closing prices of the asset, each row is a date and each column is a ticker/id. :type prices: pd.DataFrame :param frequency: number of time periods in a year, defaults to 252 (the number of trading days in a year) :type frequency: int, optional :param random_state: random seed to make results reproducible, defaults to None :type random_state: int, optional :return: annualised estimate of covariance matrix :rtype: pd.DataFrame """ if not isinstance(prices, pd.DataFrame): warnings.warn("prices are not in a dataframe", RuntimeWarning) prices = pd.DataFrame(prices) assets = prices.columns X = prices.pct_change().dropna(how="all") X = np.nan_to_num(X.values) raw_cov_array = covariance.fast_mcd(X, random_state=random_state)[1] return pd.DataFrame(raw_cov_array, index=assets, columns=assets) * frequency
def _mcd(X): """Wrapper for sklearn mcd covariance estimator""" _, C, _, _ = fast_mcd(X.T) return C
def _mcd(X): """Wrapper for sklearn mcd covariance estimator""" _, C, _, _ = fast_mcd(X.T) return C
def test_fast_mcd_on_invalid_input(): X = np.arange(100) msg = "Expected 2D array, got 1D array instead" with pytest.raises(ValueError, match=msg): fast_mcd(X)