Example #1
0
def sim_mont_portfolio(daily_returns, num_portfolios, risk_free):
    num_assets = len(daily_returns.T)
    #Packages
    import pandas as pd
    import sklearn.covariance as skcov
    import numpy as np
    import statsmodels.api as sm
    huber = sm.robust.scale.Huber()
    #Mean and standar deviation returns
    returns_av, scale = huber(daily_returns)
    #returns_av = daily_returns.mean()
    covariance = skcov.ShrunkCovariance().fit(daily_returns).covariance_
    #Simulated weights
    weights = np.array(np.random.random(num_assets * num_portfolios)).reshape(
        num_portfolios, num_assets)
    weights = weights * np.matlib.repmat(1 / weights.sum(axis=1), num_assets,
                                         1).T
    ret = 252 * weights.dot(returns_av).T
    sd = np.zeros(num_portfolios)
    for i in range(num_portfolios):
        sd[i] = np.sqrt(
            252 * (((weights[i, :]).dot(covariance)).dot(weights[i, :].T)))
    sharpe = np.divide((ret - risk_free), sd)
    return pd.DataFrame(data=np.column_stack((ret, sd, sharpe, weights)),
                        columns=(['Returns', 'SD', 'Sharpe'] +
                                 list(daily_returns.columns)))
Example #2
0
def optimal_portfolio(daily_returns, N, r):
    # Frontier points
    #Packages
    import pandas as pd
    import sklearn.covariance as skcov
    import numpy as np
    import cvxopt as opt
    from cvxopt import blas, solvers
    import statsmodels.api as sm
    huber = sm.robust.scale.Huber()
    n = len(daily_returns.T)
    returns = np.asmatrix(daily_returns)
    mus = [(10**(5.0 * t / N - 1.0) - 10**(-1)) for t in range(N)]
    #cvxopt matrices
    S = opt.matrix(skcov.ShrunkCovariance().fit(returns).covariance_)
    returns_av, scale = huber(returns)
    pbar = opt.matrix(returns_av)
    # Constraint matrices
    G = -opt.matrix(np.eye(n))  # negative n x n identity matrix
    h = opt.matrix(0.0, (n, 1))
    A = opt.matrix(1.0, (1, n))
    b = opt.matrix(1.0)
    # Calculate efficient frontier weights using quadratic programming
    portfolios = [solvers.qp(mu * S, -pbar, G, h, A, b)['x'] for mu in mus]
    # Risk and returns
    returns = [252 * blas.dot(pbar, x) for x in portfolios]
    risks = [np.sqrt(252 * blas.dot(x, S * x)) for x in portfolios]
    portfolios = [np.eye(n).dot(portfolios[i])[:, 0] for i in range(N)]
    returns = np.asarray(returns)
    risks = np.asarray(risks)
    sharpe = np.divide((returns - r), risks)
    portfolios = np.asarray(portfolios)
    return pd.DataFrame(
        data=np.column_stack((returns, risks, sharpe, portfolios)),
        columns=(['Returns', 'SD', 'Sharpe'] + list(daily_returns.columns)))
def sim_mont_portfolio(daily_ret, num_portfolios, risk_free):
    num_stocks = daily_ret.columns.size
    #Packages
    import pandas as pd
    import sklearn.covariance as skcov
    import numpy as np
    # Mean returns
    daily_ret_mean = daily_ret.mean()
    # Covariance matrix
    robust_cov_matrix = skcov.ShrunkCovariance().fit(daily_ret).covariance_
    #Simulated weights
    weights = np.random.random((num_portfolios, num_stocks))
    weights /= np.sum(weights, axis=1)[:, None]
    portfolio_ret = weights.dot(daily_ret_mean) * 252
    portfolio_std_dev = np.zeros(num_portfolios)
    for i in range(num_portfolios):
        portfolio_std_dev[i] = np.sqrt(
            252 *
            (((weights[i, :]).dot(robust_cov_matrix)).dot(weights[i, :].T)))
    sharpe = (portfolio_ret - risk_free) / portfolio_std_dev
    return pd.DataFrame(
        np.column_stack((portfolio_ret, portfolio_std_dev, sharpe, weights)),
        columns=(['Rendimiento', 'SD', 'Sharpe'] + list(daily_ret.columns)))


####### Efficient frontier points via quadratic programming
#def optimal_portfolio(daily_ret, n_opt, risk_free):
#    # Frontier points
#    #Packages
#    import pandas as pd
#    import sklearn.covariance as skcov
#    import numpy as np
#    import cvxopt as opt
#    from cvxopt import blas, solvers
#    num_stocks = daily_ret.columns.size
#    mus = [(10**(5.0 * t/N- 1.0)-10**(-1)) for t in range(N)]
#    #cvxopt matrices
#    S = opt.matrix(skcov.ShrunkCovariance().fit(daily_ret).covariance_)
#    daily_ret_mean = daily_ret.mean().values
#    # Constraint matrices
#    G = -opt.matrix(np.eye(n))   # negative n x n identity matrix
#    h = opt.matrix(0.0, (n ,1))
#    A = opt.matrix(np.array(np.ones(num_stocks),daily_ret_mean), (2, num_stocks))
#    b = opt.matrix(np.array(1.0))
#    # Calculate efficient frontier weights using quadratic programming
#    portfolios = [solvers.qp(mu*S, -pbar, G, h, A, b)['x'] for mu in mus]
#    # Risk and returns
#    returns = [252*blas.dot(pbar, x) for x in portfolios]
#    risks = [np.sqrt(252*blas.dot(x, S*x)) for x in portfolios]
#    portfolios=[np.eye(n).dot(portfolios[i])[:,0] for i in range(N)]
#    returns = np.asarray(returns)
#    risks = np.asarray(risks)
#    sharpe=np.divide((returns-r),risks)
#    portfolios = np.asarray(portfolios)
#    return  pd.DataFrame(data=np.column_stack((returns,risks,sharpe,portfolios)),columns=(['Returns','SD','Sharpe']+list(daily_returns.columns)))
Example #4
0
def gen_Kinst(nl, lims=(-.01, .03), nsamp=1000, rms=.01):
    import sklearn.covariance as sklcov
    samples = np.random.randn(nsamp)[None, :] * np.linspace(*lims, nl)[:, None]
    samples_noise = rms * np.random.randn(nsamp, nl)

    cov_reg = sklcov.ShrunkCovariance(shrinkage=.05, store_precision=True)
    cov_reg.fit(samples.T + samples_noise)

    #cov = np.cov(samples + samples_noise.T)

    return cov_reg
def optimal_portfolio_b(daily_ret, n_opt, risk_free, c0):
    # Frontier points
    #Packages
    import pandas as pd
    import sklearn.covariance as skcov
    import numpy as np
    import cvxopt as opt
    from cvxopt import blas, solvers
    # Bond inclusion
    robust_cov_matrix = np.insert(
        (np.insert(skcov.ShrunkCovariance().fit(daily_ret).covariance_,
                   daily_ret.columns.size,
                   0,
                   axis=0)),
        daily_ret.columns.size,
        0,
        axis=1)
    daily_ret_b = pd.DataFrame(np.column_stack(
        (np.asarray(daily_ret), c0 * np.ones(daily_ret.index.size))),
                               columns=list(daily_ret.columns) + ['BOND'],
                               index=daily_ret.index)
    num_stocks = daily_ret_b.columns.size
    daily_ret_mean = daily_ret_b.mean()
    mus = np.linspace(daily_ret_mean.min(), daily_ret_mean.max(), n_opt)
    #cvxopt matrices
    S = opt.matrix(robust_cov_matrix)
    G = -opt.matrix(
        np.concatenate(
            (np.array([daily_ret_mean]), np.eye(num_stocks)), axis=0))
    p = opt.matrix(np.zeros((num_stocks, 1)))
    A = opt.matrix(np.ones((1, num_stocks)))
    b = opt.matrix(np.array([1.0]))
    # Calculate efficient frontier weights using quadratic programming
    portfolios = np.zeros((n_opt, num_stocks))
    for k in range(n_opt):
        h = -opt.matrix(
            np.concatenate(
                (np.array([[mus[k]]]), np.zeros((num_stocks, 1))), axis=0))
        portfolios[k, :] = np.asarray(solvers.qp(S, p, G, h, A, b)['x']).T[0]
    # Risk and returns
    returns = 252 * portfolios.dot(daily_ret_mean)
    risks = np.zeros(n_opt)
    for i in range(n_opt):
        risks[i] = np.sqrt(
            252 *
            portfolios[i, :].dot(robust_cov_matrix).dot(portfolios[i, :].T))
    sharpe = (returns - risk_free) / risks
    return pd.DataFrame(
        data=np.column_stack((returns, risks, sharpe, portfolios)),
        columns=(['Rendimiento', 'SD', 'Sharpe'] + list(daily_ret_b.columns)))
Example #6
0
 def __init__(self, dim, estimator='OAS', **kwargs):
     """
     TODO
     """
     super(SKGaussianParams, self).__init__(dim, **kwargs)
     if estimator == 'EmpiricalCovariance':
         self._estimator = covariance.EmpiricalCovariance(
             assume_centered=True)
     elif estimator == 'LedoitWolf':
         self._estimator = covariance.LedoitWolf(assume_centered=True)
     elif estimator == 'MinCovDet':
         self._estimator = covariance.MinCovDet(assume_centered=True)
     elif estimator == 'OAS':
         self._estimator = covariance.OAS(assume_centered=True)
     elif estimator == 'ShrunkCovariance':
         self._estimator = covariance.ShrunkCovariance(assume_centered=True)
     else:
         raise ValueError('Unknown estimator: {}'.format(estimator))
Example #7
0
    def fit(self, covObj=covariance.ShrunkCovariance(shrinkage=0.2)):
        """
        
        Generates covariance matrices for each label.  We can use a variety of
        covariance estimates here:
            
            empirical (unadvised)
            shrunken
                Basic
                Ledoit-Wolf
                Oracle
            sparse (via graphical lasso)
            robust (via minimum covariance determinant)
            
        Parameters:
        - - - - - 
            
            covObj : covariance estimation object
                    User specifies what type of covariance estimation they
                    want to incorprate by providing a object

        """

        cov = {}
        cov = cov.fromkeys(self._labels)

        precision = {}
        precision = precision.fromkeys(self._labels)

        for l in self._labels:

            cov[l] = deepcopy(covObj)
            cov[l].fit(self._labelData[l])
            precision[l] = cov[l].get_precision()

        self._cov = cov
        self._precision = precision
        self._fitted = True
def sim_mont_portfolio(daily_ret, num_portfolios, risk_free):
    num_stocks = daily_ret.columns.size
    #Packages
    import pandas as pd
    import sklearn.covariance as skcov
    import numpy as np
    # Mean returns
    daily_ret_mean = daily_ret.mean()
    # Covariance matrix
    robust_cov_matrix = skcov.ShrunkCovariance().fit(daily_ret).covariance_
    #Simulated weights
    weights = np.random.random((num_portfolios, num_stocks))
    weights /= np.sum(weights, axis=1)[:, None]
    portfolio_ret = weights.dot(daily_ret_mean) * 252
    portfolio_std_dev = np.zeros(num_portfolios)
    for i in range(num_portfolios):
        portfolio_std_dev[i] = np.sqrt(
            252 *
            (((weights[i, :]).dot(robust_cov_matrix)).dot(weights[i, :].T)))
    sharpe = (portfolio_ret - risk_free) / portfolio_std_dev
    return pd.DataFrame(
        np.column_stack((portfolio_ret, portfolio_std_dev, sharpe, weights)),
        columns=(['Rendimiento', 'SD', 'Sharpe'] + list(daily_ret.columns)))
Example #9
0
def computeCovar(bed, shrinkMethod, fitIndividuals):
    eigen = dict([])

    if (shrinkMethod in ['lw', 'oas', 'l1', 'cv']):
        import sklearn.covariance as cov
        t0 = time.time()
        print 'Estimating shrunk covariance using', shrinkMethod, 'estimator...'

        if (shrinkMethod == 'lw'):
            covEstimator = cov.LedoitWolf(assume_centered=True,
                                          block_size=5 * bed.val.shape[0])
        elif (shrinkMethod == 'oas'):
            covEstimator = cov.OAS(assume_centered=True)
        elif (shrinkMethod == 'l1'):
            covEstimator = cov.GraphLassoCV(assume_centered=True, verbose=True)
        elif (shrinkMethod == 'cv'):
            shrunkEstimator = cov.ShrunkCovariance(assume_centered=True)
            param_grid = {'shrinkage': [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99]}
            covEstimator = sklearn.grid_search.GridSearchCV(
                shrunkEstimator, param_grid)
        else:
            raise Exception('unknown covariance regularizer')

        covEstimator.fit(bed.val[fitIndividuals, :].T)
        if (shrinkMethod == 'l1'):
            alpha = covEstimator.alpha_
            print 'l1 alpha chosen:', alpha
            covEstimator2 = cov.GraphLasso(alpha=alpha,
                                           assume_centered=True,
                                           verbose=True)
        else:
            if (shrinkMethod == 'cv'):
                shrinkEstimator = clf.best_params_['shrinkage']
            else:
                shrinkEstimator = covEstimator.shrinkage_
            print 'shrinkage estimator:', shrinkEstimator
            covEstimator2 = cov.ShrunkCovariance(shrinkage=shrinkEstimator,
                                                 assume_centered=True)
        covEstimator2.fit(bed.val.T)
        XXT = covEstimator2.covariance_ * bed.val.shape[1]
        print 'Done in %0.2f' % (time.time() - t0), 'seconds'

    else:
        print 'Computing kinship matrix...'
        t0 = time.time()
        XXT = symmetrize(blas.dsyrk(1.0, bed.val, lower=1))
        print 'Done in %0.2f' % (time.time() - t0), 'seconds'
        try:
            shrinkParam = float(shrinkMethod)
        except:
            shrinkParam = -1
        if (shrinkMethod == 'mylw'):
            XXT_fit = XXT[np.ix_(fitIndividuals, fitIndividuals)]
            sE2R = (np.sum(XXT_fit**2) -
                    np.sum(np.diag(XXT_fit)**2)) / (bed.val.shape[1]**2)
            #temp = (bed.val**2).dot((bed.val.T)**2)
            temp = symmetrize(
                blas.dsyrk(1.0, bed.val[fitIndividuals, :]**2, lower=1))
            sER2 = (temp.sum() - np.diag(temp).sum()) / bed.val.shape[1]
            shrinkParam = (sER2 - sE2R) / (sE2R * (bed.val.shape[1] - 1))
        if (shrinkParam > 0):
            print 'shrinkage estimator:', 1 - shrinkParam
            XXT = (1 - shrinkParam) * XXT + bed.val.shape[
                1] * shrinkParam * np.eye(XXT.shape[0])

    return XXT
Example #10
0
def covar_matrix(X, method="hist", d=0.94, **kwargs):
    r"""
    Calculate the covariance matrix using the selected method.
    
    Parameters
    ----------
    X : DataFrame of shape (n_samples, n_features)
        Features matrix, where n_samples is the number of samples and 
        n_features is the number of features.    
    method : str, can be {'hist', 'ewma1', 'ewma2', 'ledoit', 'oas' or 'shrunk'}
        The default is 'hist'. The method used to estimate the covariance matrix:
        
        - 'hist': use historical estimates.
        - 'ewma1'': use ewma with adjust=True, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`_ for more details.
        - 'ewma2': use ewma with adjust=False, see `EWM <https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows>`_ for more details.
        - 'ledoit': use the Ledoit and Wolf Shrinkage method.
        - 'oas': use the Oracle Approximation Shrinkage method.
        - 'shrunk': use the basic Shrunk Covariance method.
    d : scalar
        The smoothing factor of ewma methods.
        The default is 0.94.            
    **kwargs:
        Other variables related to covariance estimation. See
        `Scikit Learn <https://scikit-learn.org/stable/modules/covariance.html>`_
        for more details.
    
    Returns
    -------
    cov : nd-array
        The estimation of covariance matrix.
        
    Raises
    ------
    ValueError
        When the value cannot be calculated.
        
    """

    if not isinstance(X, pd.DataFrame):
        raise ValueError("X must be a DataFrame")

    assets = X.columns.tolist()

    if method == "hist":
        cov = np.cov(X.T)
    elif method == "ewma1":
        cov = X.ewm(alpha=1 - d).cov()
        item = cov.iloc[-1, :].name[0]
        cov = cov.loc[(item, slice(None)), :]
    elif method == "ewma2":
        cov = X.ewm(alpha=1 - d, adjust=False).cov()
        item = cov.iloc[-1, :].name[0]
        cov = cov.loc[(item, slice(None)), :]
    elif method == "ledoit":
        lw = skcov.LedoitWolf(**kwargs)
        lw.fit(X)
        cov = lw.covariance_
    elif method == "oas":
        oas = skcov.OAS(**kwargs)
        oas.fit(X)
        cov = oas.covariance_
    elif method == "shrunk":
        sc = skcov.ShrunkCovariance(**kwargs)
        sc.fit(X)
        cov = sc.covariance_

    cov = pd.DataFrame(np.array(cov, ndmin=2), columns=assets, index=assets)

    return cov
def computeIndependentExCls(descr,
                            neg_desc,
                            the_cls,
                            outputfolder=None,
                            suffix='_ecls.pkl.gz',
                            parallel=True,
                            nprocs=None,
                            resampling=0,
                            files=None,
                            load=False,
                            return_none=False,
                            n_cls=-1):
    """
    compute for each descr an exemplar classifier using the descr. of 
    <neg_desc> as negatives, optionally save the classifiers
    """
    print '=> compute independent e-cls'
    if files is not None: assert (len(files) == len(descr))
    print outputfolder, len(files) if files else '', suffix, load

    if isinstance(the_cls, LDA):
        fname = os.path.join(outputfolder, 'covinv.pkl.gz')
        if load and os.path.exists(fname):
            cov_inv = pc.load(fname)
        else:
            #            cc = covariance.GraphLassoCV()
            cc = covariance.ShrunkCovariance()
            #            cc = covariance.LeoditWolf()
            #            cc = covariance.OAS()
            #            cc = covariance.MinCovDet()
            cc.fit(neg_desc)
            cov_inv = cc.precision_

            #            covar = np.cov(neg_desc.T, bias=1)
            #            # regularize
            #            covar[np.diag_indices(len(covar))] += 0.01
            #            cov_inv = np.linalg.inv(covar)
            pc.dump(fname, cov_inv, verbose=False)
        print '| elda: cov_inv.shape:', cov_inv.shape
        mean = np.mean(neg_desc, axis=0)
        zero_mean = descr - mean

    if n_cls is not None and n_cls > 0:
        indices = np.random.choice(len(neg_desc),
                                   min(len(neg_desc), n_cls),
                                   replace=False)
        neg_desc = neg_desc[indices]
        print 'choose to use {} neg-descr'.format(len(neg_desc))

    widgets = [
        progressbar.Percentage(), ' ',
        progressbar.Bar(), ' ',
        progressbar.ETA()
    ]
    progress = progressbar.ProgressBar(widgets=widgets, maxval=len(descr))

    def createEx(i):
        #        print 'all.shape:', descr.shape, 'one:', descr[i].shape
        fname = ''
        if outputfolder is not None and files is not None:
            if files[i].endswith('.pkl.gz'):
                fname = files[i].replace('.pkl.gz', suffix)
            else:
                fname = os.path.splitext(files[i])[0] + suffix
            fname = os.path.join(outputfolder, os.path.basename(fname))

        if load and fname != '' and os.path.exists(fname):
            run = False
            try:
                cls = pc.load(fname)
                assert (cls.__class__.__name__ == the_cls.__class__.__name__)
                progress.update(i + 1)
                if return_none: return None
                return cls
            except:  # e.g. EOFError most of the time
                print 'Warning: couldnt load {} -> recompute'.format(fname)


#        print 'compute cls for', os.path.basename(files[i])

        if isinstance(the_cls, LDA):
            cls = copy.deepcopy(the_cls)
            w = cov_inv.dot(zero_mean[i].T)
            cls.coef_ = w.reshape(1, -1)
            cls.intercept_ = 0  #np.zeros( (cls.coef_.shape[0],1) )
        else:
            cls = exemplar_cls.createExemplarCls(descr[i].reshape(1, -1),
                                                 neg_desc, the_cls, resampling)
        if fname != '':
            pc.dump(fname, cls, verbose=False)
        progress.update(i + 1)
        if return_none: return None
        return cls

    progress.start()
    if parallel:
        ex_cls = pc.parmap(createEx, range(len(descr)), nprocs=nprocs)
    else:
        ex_cls = map(createEx, range(len(descr)))
    progress.finish()

    print '[Done]'
    return ex_cls