def sim_mont_portfolio(daily_returns, num_portfolios, risk_free): num_assets = len(daily_returns.T) #Packages import pandas as pd import sklearn.covariance as skcov import numpy as np import statsmodels.api as sm huber = sm.robust.scale.Huber() #Mean and standar deviation returns returns_av, scale = huber(daily_returns) #returns_av = daily_returns.mean() covariance = skcov.ShrunkCovariance().fit(daily_returns).covariance_ #Simulated weights weights = np.array(np.random.random(num_assets * num_portfolios)).reshape( num_portfolios, num_assets) weights = weights * np.matlib.repmat(1 / weights.sum(axis=1), num_assets, 1).T ret = 252 * weights.dot(returns_av).T sd = np.zeros(num_portfolios) for i in range(num_portfolios): sd[i] = np.sqrt( 252 * (((weights[i, :]).dot(covariance)).dot(weights[i, :].T))) sharpe = np.divide((ret - risk_free), sd) return pd.DataFrame(data=np.column_stack((ret, sd, sharpe, weights)), columns=(['Returns', 'SD', 'Sharpe'] + list(daily_returns.columns)))
def optimal_portfolio(daily_returns, N, r): # Frontier points #Packages import pandas as pd import sklearn.covariance as skcov import numpy as np import cvxopt as opt from cvxopt import blas, solvers import statsmodels.api as sm huber = sm.robust.scale.Huber() n = len(daily_returns.T) returns = np.asmatrix(daily_returns) mus = [(10**(5.0 * t / N - 1.0) - 10**(-1)) for t in range(N)] #cvxopt matrices S = opt.matrix(skcov.ShrunkCovariance().fit(returns).covariance_) returns_av, scale = huber(returns) pbar = opt.matrix(returns_av) # Constraint matrices G = -opt.matrix(np.eye(n)) # negative n x n identity matrix h = opt.matrix(0.0, (n, 1)) A = opt.matrix(1.0, (1, n)) b = opt.matrix(1.0) # Calculate efficient frontier weights using quadratic programming portfolios = [solvers.qp(mu * S, -pbar, G, h, A, b)['x'] for mu in mus] # Risk and returns returns = [252 * blas.dot(pbar, x) for x in portfolios] risks = [np.sqrt(252 * blas.dot(x, S * x)) for x in portfolios] portfolios = [np.eye(n).dot(portfolios[i])[:, 0] for i in range(N)] returns = np.asarray(returns) risks = np.asarray(risks) sharpe = np.divide((returns - r), risks) portfolios = np.asarray(portfolios) return pd.DataFrame( data=np.column_stack((returns, risks, sharpe, portfolios)), columns=(['Returns', 'SD', 'Sharpe'] + list(daily_returns.columns)))
def sim_mont_portfolio(daily_ret, num_portfolios, risk_free): num_stocks = daily_ret.columns.size #Packages import pandas as pd import sklearn.covariance as skcov import numpy as np # Mean returns daily_ret_mean = daily_ret.mean() # Covariance matrix robust_cov_matrix = skcov.ShrunkCovariance().fit(daily_ret).covariance_ #Simulated weights weights = np.random.random((num_portfolios, num_stocks)) weights /= np.sum(weights, axis=1)[:, None] portfolio_ret = weights.dot(daily_ret_mean) * 252 portfolio_std_dev = np.zeros(num_portfolios) for i in range(num_portfolios): portfolio_std_dev[i] = np.sqrt( 252 * (((weights[i, :]).dot(robust_cov_matrix)).dot(weights[i, :].T))) sharpe = (portfolio_ret - risk_free) / portfolio_std_dev return pd.DataFrame( np.column_stack((portfolio_ret, portfolio_std_dev, sharpe, weights)), columns=(['Rendimiento', 'SD', 'Sharpe'] + list(daily_ret.columns))) ####### Efficient frontier points via quadratic programming #def optimal_portfolio(daily_ret, n_opt, risk_free): # # Frontier points # #Packages # import pandas as pd # import sklearn.covariance as skcov # import numpy as np # import cvxopt as opt # from cvxopt import blas, solvers # num_stocks = daily_ret.columns.size # mus = [(10**(5.0 * t/N- 1.0)-10**(-1)) for t in range(N)] # #cvxopt matrices # S = opt.matrix(skcov.ShrunkCovariance().fit(daily_ret).covariance_) # daily_ret_mean = daily_ret.mean().values # # Constraint matrices # G = -opt.matrix(np.eye(n)) # negative n x n identity matrix # h = opt.matrix(0.0, (n ,1)) # A = opt.matrix(np.array(np.ones(num_stocks),daily_ret_mean), (2, num_stocks)) # b = opt.matrix(np.array(1.0)) # # Calculate efficient frontier weights using quadratic programming # portfolios = [solvers.qp(mu*S, -pbar, G, h, A, b)['x'] for mu in mus] # # Risk and returns # returns = [252*blas.dot(pbar, x) for x in portfolios] # risks = [np.sqrt(252*blas.dot(x, S*x)) for x in portfolios] # portfolios=[np.eye(n).dot(portfolios[i])[:,0] for i in range(N)] # returns = np.asarray(returns) # risks = np.asarray(risks) # sharpe=np.divide((returns-r),risks) # portfolios = np.asarray(portfolios) # return pd.DataFrame(data=np.column_stack((returns,risks,sharpe,portfolios)),columns=(['Returns','SD','Sharpe']+list(daily_returns.columns)))
def gen_Kinst(nl, lims=(-.01, .03), nsamp=1000, rms=.01): import sklearn.covariance as sklcov samples = np.random.randn(nsamp)[None, :] * np.linspace(*lims, nl)[:, None] samples_noise = rms * np.random.randn(nsamp, nl) cov_reg = sklcov.ShrunkCovariance(shrinkage=.05, store_precision=True) cov_reg.fit(samples.T + samples_noise) #cov = np.cov(samples + samples_noise.T) return cov_reg
def optimal_portfolio_b(daily_ret, n_opt, risk_free, c0): # Frontier points #Packages import pandas as pd import sklearn.covariance as skcov import numpy as np import cvxopt as opt from cvxopt import blas, solvers # Bond inclusion robust_cov_matrix = np.insert( (np.insert(skcov.ShrunkCovariance().fit(daily_ret).covariance_, daily_ret.columns.size, 0, axis=0)), daily_ret.columns.size, 0, axis=1) daily_ret_b = pd.DataFrame(np.column_stack( (np.asarray(daily_ret), c0 * np.ones(daily_ret.index.size))), columns=list(daily_ret.columns) + ['BOND'], index=daily_ret.index) num_stocks = daily_ret_b.columns.size daily_ret_mean = daily_ret_b.mean() mus = np.linspace(daily_ret_mean.min(), daily_ret_mean.max(), n_opt) #cvxopt matrices S = opt.matrix(robust_cov_matrix) G = -opt.matrix( np.concatenate( (np.array([daily_ret_mean]), np.eye(num_stocks)), axis=0)) p = opt.matrix(np.zeros((num_stocks, 1))) A = opt.matrix(np.ones((1, num_stocks))) b = opt.matrix(np.array([1.0])) # Calculate efficient frontier weights using quadratic programming portfolios = np.zeros((n_opt, num_stocks)) for k in range(n_opt): h = -opt.matrix( np.concatenate( (np.array([[mus[k]]]), np.zeros((num_stocks, 1))), axis=0)) portfolios[k, :] = np.asarray(solvers.qp(S, p, G, h, A, b)['x']).T[0] # Risk and returns returns = 252 * portfolios.dot(daily_ret_mean) risks = np.zeros(n_opt) for i in range(n_opt): risks[i] = np.sqrt( 252 * portfolios[i, :].dot(robust_cov_matrix).dot(portfolios[i, :].T)) sharpe = (returns - risk_free) / risks return pd.DataFrame( data=np.column_stack((returns, risks, sharpe, portfolios)), columns=(['Rendimiento', 'SD', 'Sharpe'] + list(daily_ret_b.columns)))
def __init__(self, dim, estimator='OAS', **kwargs): """ TODO """ super(SKGaussianParams, self).__init__(dim, **kwargs) if estimator == 'EmpiricalCovariance': self._estimator = covariance.EmpiricalCovariance( assume_centered=True) elif estimator == 'LedoitWolf': self._estimator = covariance.LedoitWolf(assume_centered=True) elif estimator == 'MinCovDet': self._estimator = covariance.MinCovDet(assume_centered=True) elif estimator == 'OAS': self._estimator = covariance.OAS(assume_centered=True) elif estimator == 'ShrunkCovariance': self._estimator = covariance.ShrunkCovariance(assume_centered=True) else: raise ValueError('Unknown estimator: {}'.format(estimator))
def fit(self, covObj=covariance.ShrunkCovariance(shrinkage=0.2)): """ Generates covariance matrices for each label. We can use a variety of covariance estimates here: empirical (unadvised) shrunken Basic Ledoit-Wolf Oracle sparse (via graphical lasso) robust (via minimum covariance determinant) Parameters: - - - - - covObj : covariance estimation object User specifies what type of covariance estimation they want to incorprate by providing a object """ cov = {} cov = cov.fromkeys(self._labels) precision = {} precision = precision.fromkeys(self._labels) for l in self._labels: cov[l] = deepcopy(covObj) cov[l].fit(self._labelData[l]) precision[l] = cov[l].get_precision() self._cov = cov self._precision = precision self._fitted = True
def sim_mont_portfolio(daily_ret, num_portfolios, risk_free): num_stocks = daily_ret.columns.size #Packages import pandas as pd import sklearn.covariance as skcov import numpy as np # Mean returns daily_ret_mean = daily_ret.mean() # Covariance matrix robust_cov_matrix = skcov.ShrunkCovariance().fit(daily_ret).covariance_ #Simulated weights weights = np.random.random((num_portfolios, num_stocks)) weights /= np.sum(weights, axis=1)[:, None] portfolio_ret = weights.dot(daily_ret_mean) * 252 portfolio_std_dev = np.zeros(num_portfolios) for i in range(num_portfolios): portfolio_std_dev[i] = np.sqrt( 252 * (((weights[i, :]).dot(robust_cov_matrix)).dot(weights[i, :].T))) sharpe = (portfolio_ret - risk_free) / portfolio_std_dev return pd.DataFrame( np.column_stack((portfolio_ret, portfolio_std_dev, sharpe, weights)), columns=(['Rendimiento', 'SD', 'Sharpe'] + list(daily_ret.columns)))
def computeCovar(bed, shrinkMethod, fitIndividuals): eigen = dict([]) if (shrinkMethod in ['lw', 'oas', 'l1', 'cv']): import sklearn.covariance as cov t0 = time.time() print 'Estimating shrunk covariance using', shrinkMethod, 'estimator...' if (shrinkMethod == 'lw'): covEstimator = cov.LedoitWolf(assume_centered=True, block_size=5 * bed.val.shape[0]) elif (shrinkMethod == 'oas'): covEstimator = cov.OAS(assume_centered=True) elif (shrinkMethod == 'l1'): covEstimator = cov.GraphLassoCV(assume_centered=True, verbose=True) elif (shrinkMethod == 'cv'): shrunkEstimator = cov.ShrunkCovariance(assume_centered=True) param_grid = {'shrinkage': [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99]} covEstimator = sklearn.grid_search.GridSearchCV( shrunkEstimator, param_grid) else: raise Exception('unknown covariance regularizer') covEstimator.fit(bed.val[fitIndividuals, :].T) if (shrinkMethod == 'l1'): alpha = covEstimator.alpha_ print 'l1 alpha chosen:', alpha covEstimator2 = cov.GraphLasso(alpha=alpha, assume_centered=True, verbose=True) else: if (shrinkMethod == 'cv'): shrinkEstimator = clf.best_params_['shrinkage'] else: shrinkEstimator = covEstimator.shrinkage_ print 'shrinkage estimator:', shrinkEstimator covEstimator2 = cov.ShrunkCovariance(shrinkage=shrinkEstimator, assume_centered=True) covEstimator2.fit(bed.val.T) XXT = covEstimator2.covariance_ * bed.val.shape[1] print 'Done in %0.2f' % (time.time() - t0), 'seconds' else: print 'Computing kinship matrix...' t0 = time.time() XXT = symmetrize(blas.dsyrk(1.0, bed.val, lower=1)) print 'Done in %0.2f' % (time.time() - t0), 'seconds' try: shrinkParam = float(shrinkMethod) except: shrinkParam = -1 if (shrinkMethod == 'mylw'): XXT_fit = XXT[np.ix_(fitIndividuals, fitIndividuals)] sE2R = (np.sum(XXT_fit**2) - np.sum(np.diag(XXT_fit)**2)) / (bed.val.shape[1]**2) #temp = (bed.val**2).dot((bed.val.T)**2) temp = symmetrize( blas.dsyrk(1.0, bed.val[fitIndividuals, :]**2, lower=1)) sER2 = (temp.sum() - np.diag(temp).sum()) / bed.val.shape[1] shrinkParam = (sER2 - sE2R) / (sE2R * (bed.val.shape[1] - 1)) if (shrinkParam > 0): print 'shrinkage estimator:', 1 - shrinkParam XXT = (1 - shrinkParam) * XXT + bed.val.shape[ 1] * shrinkParam * np.eye(XXT.shape[0]) return XXT
def covar_matrix(X, method="hist", d=0.94, **kwargs): r""" Calculate the covariance matrix using the selected method. Parameters ---------- X : DataFrame of shape (n_samples, n_features) Features matrix, where n_samples is the number of samples and n_features is the number of features. method : str, can be {'hist', 'ewma1', 'ewma2', 'ledoit', 'oas' or 'shrunk'} The default is 'hist'. The method used to estimate the covariance matrix: - 'hist': use historical estimates. - 'ewma1'': use ewma with adjust=True, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`_ for more details. - 'ewma2': use ewma with adjust=False, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`_ for more details. - 'ledoit': use the Ledoit and Wolf Shrinkage method. - 'oas': use the Oracle Approximation Shrinkage method. - 'shrunk': use the basic Shrunk Covariance method. d : scalar The smoothing factor of ewma methods. The default is 0.94. **kwargs: Other variables related to covariance estimation. See `Scikit Learn <https://scikit-learn.org/stable/modules/covariance.html>`_ for more details. Returns ------- cov : nd-array The estimation of covariance matrix. Raises ------ ValueError When the value cannot be calculated. """ if not isinstance(X, pd.DataFrame): raise ValueError("X must be a DataFrame") assets = X.columns.tolist() if method == "hist": cov = np.cov(X.T) elif method == "ewma1": cov = X.ewm(alpha=1 - d).cov() item = cov.iloc[-1, :].name[0] cov = cov.loc[(item, slice(None)), :] elif method == "ewma2": cov = X.ewm(alpha=1 - d, adjust=False).cov() item = cov.iloc[-1, :].name[0] cov = cov.loc[(item, slice(None)), :] elif method == "ledoit": lw = skcov.LedoitWolf(**kwargs) lw.fit(X) cov = lw.covariance_ elif method == "oas": oas = skcov.OAS(**kwargs) oas.fit(X) cov = oas.covariance_ elif method == "shrunk": sc = skcov.ShrunkCovariance(**kwargs) sc.fit(X) cov = sc.covariance_ cov = pd.DataFrame(np.array(cov, ndmin=2), columns=assets, index=assets) return cov
def computeIndependentExCls(descr, neg_desc, the_cls, outputfolder=None, suffix='_ecls.pkl.gz', parallel=True, nprocs=None, resampling=0, files=None, load=False, return_none=False, n_cls=-1): """ compute for each descr an exemplar classifier using the descr. of <neg_desc> as negatives, optionally save the classifiers """ print '=> compute independent e-cls' if files is not None: assert (len(files) == len(descr)) print outputfolder, len(files) if files else '', suffix, load if isinstance(the_cls, LDA): fname = os.path.join(outputfolder, 'covinv.pkl.gz') if load and os.path.exists(fname): cov_inv = pc.load(fname) else: # cc = covariance.GraphLassoCV() cc = covariance.ShrunkCovariance() # cc = covariance.LeoditWolf() # cc = covariance.OAS() # cc = covariance.MinCovDet() cc.fit(neg_desc) cov_inv = cc.precision_ # covar = np.cov(neg_desc.T, bias=1) # # regularize # covar[np.diag_indices(len(covar))] += 0.01 # cov_inv = np.linalg.inv(covar) pc.dump(fname, cov_inv, verbose=False) print '| elda: cov_inv.shape:', cov_inv.shape mean = np.mean(neg_desc, axis=0) zero_mean = descr - mean if n_cls is not None and n_cls > 0: indices = np.random.choice(len(neg_desc), min(len(neg_desc), n_cls), replace=False) neg_desc = neg_desc[indices] print 'choose to use {} neg-descr'.format(len(neg_desc)) widgets = [ progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] progress = progressbar.ProgressBar(widgets=widgets, maxval=len(descr)) def createEx(i): # print 'all.shape:', descr.shape, 'one:', descr[i].shape fname = '' if outputfolder is not None and files is not None: if files[i].endswith('.pkl.gz'): fname = files[i].replace('.pkl.gz', suffix) else: fname = os.path.splitext(files[i])[0] + suffix fname = os.path.join(outputfolder, os.path.basename(fname)) if load and fname != '' and os.path.exists(fname): run = False try: cls = pc.load(fname) assert (cls.__class__.__name__ == the_cls.__class__.__name__) progress.update(i + 1) if return_none: return None return cls except: # e.g. EOFError most of the time print 'Warning: couldnt load {} -> recompute'.format(fname) # print 'compute cls for', os.path.basename(files[i]) if isinstance(the_cls, LDA): cls = copy.deepcopy(the_cls) w = cov_inv.dot(zero_mean[i].T) cls.coef_ = w.reshape(1, -1) cls.intercept_ = 0 #np.zeros( (cls.coef_.shape[0],1) ) else: cls = exemplar_cls.createExemplarCls(descr[i].reshape(1, -1), neg_desc, the_cls, resampling) if fname != '': pc.dump(fname, cls, verbose=False) progress.update(i + 1) if return_none: return None return cls progress.start() if parallel: ex_cls = pc.parmap(createEx, range(len(descr)), nprocs=nprocs) else: ex_cls = map(createEx, range(len(descr))) progress.finish() print '[Done]' return ex_cls