def fallback_covariance(time_series): from sklearn.ensemble import IsolationForest from sklearn import covariance # Remove gross outliers model = IsolationForest(contamination=0.02) model.fit(time_series) outlier_mask = model.predict(time_series) outlier_mask[outlier_mask == -1] = 0 time_series = time_series[outlier_mask.astype('bool')] # Fall back to LedoitWolf print('Matrix estimation failed with Lasso and shrinkage due to ' 'ill conditions. Removing potential anomalies from the ' 'time-series using IsolationForest...') try: print("Trying Ledoit-Wolf Estimator...") conn_measure = ConnectivityMeasure( cov_estimator=covariance.LedoitWolf(store_precision=True, assume_centered=True), kind=kind) conn_matrix = conn_measure.fit_transform([time_series])[0] except (np.linalg.linalg.LinAlgError, FloatingPointError): print("Trying Oracle Approximating Shrinkage Estimator...") conn_measure = ConnectivityMeasure( cov_estimator=covariance.OAS(assume_centered=True), kind=kind) try: conn_matrix = conn_measure.fit_transform([time_series])[0] except (np.linalg.linalg.LinAlgError, FloatingPointError): raise ValueError('All covariance estimators failed to ' 'converge...') return conn_matrix
def __init__(self, window=float('inf'), mu_estimator=None, cov_estimator=None, min_history=None, max_leverage=1., method='mpt', q=0.01, gamma=0., allow_cash=False, **kwargs): """ :param window: Window for calculating mean and variance. Use float('inf') for entire history. :param mu_estimator: TODO :param cov_estimator: TODO :param min_history: Use zero weights for first min_periods. :param max_leverage: Max leverage to use. :param method: optimization objective - can be "mpt", "sharpe" and "variance" :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk) :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees) :param allow_cash: Allow holding cash (weights doesn't have to sum to 1) """ if np.isinf(window): window = int(1e+8) min_history = min_history or 50 else: min_history = min_history or window super(MPT, self).__init__(min_history=min_history, **kwargs) self.window = window self.max_leverage = max_leverage self.method = method self.q = q self.gamma = gamma self.allow_cash = allow_cash if cov_estimator is None: cov_estimator = 'empirical' if isinstance(cov_estimator, basestring): if cov_estimator == 'empirical': # use pandas covariance in init_step cov_estimator = covariance.EmpiricalCovariance() elif cov_estimator == 'ledoit-wolf': cov_estimator = covariance.LedoitWolf() elif cov_estimator == 'graph-lasso': cov_estimator = covariance.GraphLasso() elif cov_estimator == 'oas': cov_estimator = covariance.OAS() else: raise NotImplemented('Unknown covariance estimator {}'.format(cov_estimator)) # handle sklearn models if isinstance(cov_estimator, BaseEstimator): cov_estimator = CovarianceEstimator(cov_estimator) if mu_estimator is None: mu_estimator = MuEstimator() if isinstance(mu_estimator, basestring): if mu_estimator == 'historical': mu_estimator = HistoricalEstimator(window) elif mu_estimator == 'sharpe': mu_estimator = MuEstimator() else: raise NotImplemented('Unknown mu estimator {}'.format(mu_estimator)) self.cov_estimator = cov_estimator self.mu_estimator = mu_estimator
def shrinkage(): plt.imshow(cov_train_pyr[0]) plt.colorbar() plt.show() cov_train_lasso = cov_train prec_train_lasso = cov_train cov_train_oas = cov_train corr_lasso = cov_train for i in range(len(data_train)): cov_train_oas[i] = covariance.OAS().fit(data_train[i]).covariance_ # plt.imshow(cov_train[i]) # plt.colorbar() # plt.show() GLassCV = covariance.GraphLassoCV(cv=5) cov_train_lasso[i] = GLassCV.fit(data_train[i]).covariance_ prec_train_lasso[i] = GLassCV.fit(data_train[i]).precision_ corr_lasso[i] = cov2corr(prec_train_lasso[i]) print('sum of correlations: ', np.sum(np.abs(corr_lasso[i]), axis=1)) myalphas = GLassCV.cv_alphas_ print(myalphas) print(np.mean(GLassCV.grid_scores_, axis=1)) plt.imshow(corr_lasso[i]) plt.colorbar() plt.show() cov_train[i] = covariance.LedoitWolf().fit(data_train[i]).covariance_
def _train(self, train_data, params, verbose): import sklearn.covariance as sk_cov if verbose: print("Training {} ...".format(self.name)) start_time = time.time() covs = [] for x in train_data: est = sk_cov.OAS() est.fit(x) covs.append(est.covariance_) finish_time = time.time() if verbose: print("\tElapsed time {:.1f}s".format(finish_time - start_time)) return covs, None
def __init__(self, dim, estimator='OAS', **kwargs): """ TODO """ super(SKGaussianParams, self).__init__(dim, **kwargs) if estimator == 'EmpiricalCovariance': self._estimator = covariance.EmpiricalCovariance( assume_centered=True) elif estimator == 'LedoitWolf': self._estimator = covariance.LedoitWolf(assume_centered=True) elif estimator == 'MinCovDet': self._estimator = covariance.MinCovDet(assume_centered=True) elif estimator == 'OAS': self._estimator = covariance.OAS(assume_centered=True) elif estimator == 'ShrunkCovariance': self._estimator = covariance.ShrunkCovariance(assume_centered=True) else: raise ValueError('Unknown estimator: {}'.format(estimator))
def computeCovar(bed, shrinkMethod, fitIndividuals): eigen = dict([]) if (shrinkMethod in ['lw', 'oas', 'l1', 'cv']): import sklearn.covariance as cov t0 = time.time() print 'Estimating shrunk covariance using', shrinkMethod, 'estimator...' if (shrinkMethod == 'lw'): covEstimator = cov.LedoitWolf(assume_centered=True, block_size=5 * bed.val.shape[0]) elif (shrinkMethod == 'oas'): covEstimator = cov.OAS(assume_centered=True) elif (shrinkMethod == 'l1'): covEstimator = cov.GraphLassoCV(assume_centered=True, verbose=True) elif (shrinkMethod == 'cv'): shrunkEstimator = cov.ShrunkCovariance(assume_centered=True) param_grid = {'shrinkage': [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99]} covEstimator = sklearn.grid_search.GridSearchCV( shrunkEstimator, param_grid) else: raise Exception('unknown covariance regularizer') covEstimator.fit(bed.val[fitIndividuals, :].T) if (shrinkMethod == 'l1'): alpha = covEstimator.alpha_ print 'l1 alpha chosen:', alpha covEstimator2 = cov.GraphLasso(alpha=alpha, assume_centered=True, verbose=True) else: if (shrinkMethod == 'cv'): shrinkEstimator = clf.best_params_['shrinkage'] else: shrinkEstimator = covEstimator.shrinkage_ print 'shrinkage estimator:', shrinkEstimator covEstimator2 = cov.ShrunkCovariance(shrinkage=shrinkEstimator, assume_centered=True) covEstimator2.fit(bed.val.T) XXT = covEstimator2.covariance_ * bed.val.shape[1] print 'Done in %0.2f' % (time.time() - t0), 'seconds' else: print 'Computing kinship matrix...' t0 = time.time() XXT = symmetrize(blas.dsyrk(1.0, bed.val, lower=1)) print 'Done in %0.2f' % (time.time() - t0), 'seconds' try: shrinkParam = float(shrinkMethod) except: shrinkParam = -1 if (shrinkMethod == 'mylw'): XXT_fit = XXT[np.ix_(fitIndividuals, fitIndividuals)] sE2R = (np.sum(XXT_fit**2) - np.sum(np.diag(XXT_fit)**2)) / (bed.val.shape[1]**2) #temp = (bed.val**2).dot((bed.val.T)**2) temp = symmetrize( blas.dsyrk(1.0, bed.val[fitIndividuals, :]**2, lower=1)) sER2 = (temp.sum() - np.diag(temp).sum()) / bed.val.shape[1] shrinkParam = (sER2 - sE2R) / (sE2R * (bed.val.shape[1] - 1)) if (shrinkParam > 0): print 'shrinkage estimator:', 1 - shrinkParam XXT = (1 - shrinkParam) * XXT + bed.val.shape[ 1] * shrinkParam * np.eye(XXT.shape[0]) return XXT
def __init__( self, window=None, mu_estimator=None, cov_estimator=None, mu_window=None, cov_window=None, min_history=None, bounds=None, max_leverage=1.0, method="mpt", q=0.01, gamma=0.0, optimizer_options=None, force_weights=None, **kwargs, ): """ :param window: Window for calculating mean and variance. Use None for entire history. :param mu_estimator: TODO :param cov_estimator: TODO :param min_history: Use zero weights for first min_periods. Default is 1 year :param max_leverage: Max leverage to use. :param method: optimization objective - can be "mpt", "sharpe" and "variance" :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk) from https://en.wikipedia.org/wiki/Modern_portfolio_theory#Efficient_frontier_with_no_risk-free_asset q=2 is equivalent to full-kelly, q=1 is equivalent to half kelly :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees) """ super().__init__(min_history=min_history, **kwargs) mu_window = mu_window or window cov_window = cov_window or window self.method = method self.q = q self.gamma = gamma self.bounds = bounds or {} self.force_weights = force_weights self.max_leverage = max_leverage self.optimizer_options = optimizer_options or {} if bounds and max_leverage != 1: raise NotImplemented( "max_leverage cannot be used with bounds, consider removing max_leverage and replace it with bounds1" ) if cov_estimator is None: cov_estimator = "empirical" if isinstance(cov_estimator, string_types): if cov_estimator == "empirical": # use pandas covariance in init_step cov_estimator = covariance.EmpiricalCovariance() elif cov_estimator == "ledoit-wolf": cov_estimator = covariance.LedoitWolf() elif cov_estimator == "graph-lasso": cov_estimator = covariance.GraphLasso() elif cov_estimator == "oas": cov_estimator = covariance.OAS() elif cov_estimator == "single-index": cov_estimator = SingleIndexCovariance() else: raise NotImplemented( "Unknown covariance estimator {}".format(cov_estimator) ) # handle sklearn models if isinstance(cov_estimator, BaseEstimator): cov_estimator = CovarianceEstimator(cov_estimator, window=cov_window) if mu_estimator is None: mu_estimator = SharpeEstimator() if isinstance(mu_estimator, string_types): if mu_estimator == "historical": mu_estimator = HistoricalEstimator(window=mu_window) elif mu_estimator == "sharpe": mu_estimator = SharpeEstimator() else: raise NotImplemented("Unknown mu estimator {}".format(mu_estimator)) self.cov_estimator = cov_estimator self.mu_estimator = mu_estimator
def covar_matrix(X, method="hist", d=0.94, **kwargs): r""" Calculate the covariance matrix using the selected method. Parameters ---------- X : DataFrame of shape (n_samples, n_features) Features matrix, where n_samples is the number of samples and n_features is the number of features. method : str, can be {'hist', 'ewma1', 'ewma2', 'ledoit', 'oas' or 'shrunk'} The default is 'hist'. The method used to estimate the covariance matrix: - 'hist': use historical estimates. - 'ewma1'': use ewma with adjust=True, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`_ for more details. - 'ewma2': use ewma with adjust=False, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`_ for more details. - 'ledoit': use the Ledoit and Wolf Shrinkage method. - 'oas': use the Oracle Approximation Shrinkage method. - 'shrunk': use the basic Shrunk Covariance method. d : scalar The smoothing factor of ewma methods. The default is 0.94. **kwargs: Other variables related to covariance estimation. See `Scikit Learn <https://scikit-learn.org/stable/modules/covariance.html>`_ for more details. Returns ------- cov : nd-array The estimation of covariance matrix. Raises ------ ValueError When the value cannot be calculated. """ if not isinstance(X, pd.DataFrame): raise ValueError("X must be a DataFrame") assets = X.columns.tolist() if method == "hist": cov = np.cov(X.T) elif method == "ewma1": cov = X.ewm(alpha=1 - d).cov() item = cov.iloc[-1, :].name[0] cov = cov.loc[(item, slice(None)), :] elif method == "ewma2": cov = X.ewm(alpha=1 - d, adjust=False).cov() item = cov.iloc[-1, :].name[0] cov = cov.loc[(item, slice(None)), :] elif method == "ledoit": lw = skcov.LedoitWolf(**kwargs) lw.fit(X) cov = lw.covariance_ elif method == "oas": oas = skcov.OAS(**kwargs) oas.fit(X) cov = oas.covariance_ elif method == "shrunk": sc = skcov.ShrunkCovariance(**kwargs) sc.fit(X) cov = sc.covariance_ cov = pd.DataFrame(np.array(cov, ndmin=2), columns=assets, index=assets) return cov
def __init__(self, mu_estimator=None, cov_estimator=None, cov_window=None, min_history=None, bounds=None, max_leverage=1., method='mpt', q=0.01, gamma=0., optimizer_options=None, force_weights=None, **kwargs): """ :param window: Window for calculating mean and variance. Use None for entire history. :param mu_estimator: TODO :param cov_estimator: TODO :param min_history: Use zero weights for first min_periods. Default is 1 year :param max_leverage: Max leverage to use. :param method: optimization objective - can be "mpt", "sharpe" and "variance" :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk) :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees) """ super().__init__(min_history=min_history, **kwargs) self.method = method self.q = q self.gamma = gamma self.bounds = bounds self.force_weights = force_weights self.max_leverage = max_leverage self.optimizer_options = optimizer_options or {} if cov_estimator is None: cov_estimator = 'empirical' if isinstance(cov_estimator, string_types): if cov_estimator == 'empirical': # use pandas covariance in init_step cov_estimator = covariance.EmpiricalCovariance() elif cov_estimator == 'ledoit-wolf': cov_estimator = covariance.LedoitWolf() elif cov_estimator == 'graph-lasso': cov_estimator = covariance.GraphLasso() elif cov_estimator == 'oas': cov_estimator = covariance.OAS() elif cov_estimator == 'single-index': cov_estimator = SingleIndexCovariance() else: raise NotImplemented( 'Unknown covariance estimator {}'.format(cov_estimator)) # handle sklearn models if isinstance(cov_estimator, BaseEstimator): cov_estimator = CovarianceEstimator(cov_estimator, window=cov_window) if mu_estimator is None: mu_estimator = SharpeEstimator() if isinstance(mu_estimator, string_types): if mu_estimator == 'historical': mu_estimator = HistoricalEstimator(window=cov_window) elif mu_estimator == 'sharpe': mu_estimator = SharpeEstimator() else: raise NotImplemented( 'Unknown mu estimator {}'.format(mu_estimator)) self.cov_estimator = cov_estimator self.mu_estimator = mu_estimator