Esempio n. 1
0
 def cov(self):
     tmp = self.returns()
     tmpl = []
     for symbol in tmp.keys():
         tmpl.append(tmp[symbol])
     return DataFrame(
         cov(array(tmpl)), index=tmp.keys(), columns=tmp.keys())
Esempio n. 2
0
 def cov(self):
     tmp = self.returns()
     tmpl = []
     for symbol in self.asset['adj_close'].columns:
         #tmpl.append(tmp[symbol])
         tmpl.append(tmp[symbol].values)
     return DataFrame(
         cov(array(tmpl)), index=self.asset['adj_close'].columns, columns=self.asset['adj_close'].columns)
Esempio n. 3
0
def princomps(data, numprincomps):
    """ Compute the first numprincomps principal components of 
        (columnwise represented) dataset data.
        Returns the transformation (first numprincomps eigenvectors as matrix)
        and the resulting low-dimensional codes.
    """
    from pylab import cov
    from numpy.linalg import eigh
    m = data.mean(1)[:, newaxis]
    u, v = eigh(cov(data - m, rowvar=1, bias=1))
    V = ((u**(-0.5))[-numprincomps:][newaxis, :] * v[:, -numprincomps:]).T
    W = ((u**(0.5))[-numprincomps:][newaxis, :] * v[:, -numprincomps:])
    return V, dot(V, data - m)
Esempio n. 4
0
def princomps(data,numprincomps):
    """ Compute the first numprincomps principal components of 
        (columnwise represented) dataset data.
        Returns the transformation (first numprincomps eigenvectors as matrix)
        and the resulting low-dimensional codes.
    """
    from pylab import cov
    from numpy.linalg import eigh
    m = data.mean(1)[:,newaxis]
    u,v=eigh(cov(data-m,rowvar=1,bias=1))
    V = ((u**(-0.5))[-numprincomps:][newaxis,:]*v[:,-numprincomps:]).T
    W = ((u**(0.5))[-numprincomps:][newaxis,:]*v[:,-numprincomps:])
    return V, dot(V,data-m)
Esempio n. 5
0
    def __init__(self, symbols, start=None, end=None, bench='^GSPC'):

        # Make sure input is a list
        if type(symbols) != list:
            symbols = [symbols]

        # Create distionary to hold assets.
        self.asset = {}

        # Retrieve assets from data source (IE. Yahoo)
        for symbol in symbols:
            try:
                self.asset[symbol] = DataReader(
                    symbol, "yahoo", start=start, end=end)
            except:
                print("Asset " + str(symbol) + " not found!")

        # Get Benchmark asset.
        self.benchmark = DataReader(bench, "yahoo", start=start, end=end)
        self.benchmark['Return'] = self.benchmark['Adj Close'].diff()

        # Get returns, beta, alpha, and sharp ratio.
        for symbol in symbols:
            # Get returns.
            self.asset[symbol]['Return'] = \
                self.asset[symbol]['Adj Close'].diff()
            # Get Beta.
            A = self.asset[symbol]['Return'].fillna(0)
            B = self.benchmark['Return'].fillna(0)
            self.asset[symbol]['Beta'] = cov(A, B)[0, 1] / cov(A, B)[1, 1]
            # Get Alpha
            self.asset[symbol]['Alpha'] = self.asset[symbol]['Return'] - \
                self.asset[symbol]['Beta'] * self.benchmark['Return']

            # Get Sharpe Ratio
            tmp = self.asset[symbol]['Return']
            self.asset[symbol]['Sharpe'] = \
                sqrt(len(tmp)) * mean(tmp.fillna(0)) / std(tmp.fillna(0))
Esempio n. 6
0
    def __init__(self, X, c):
        self.n, self.N = X.shape
        self.X = X
        self.mu = empty((3, self.n))
        self.cov = empty((3, self.n, self.n))
        self.P = empty(3)
        cond = zeros(self.N)
        for i in range(0, 3):
            cond = cond + 1.0
            indices = where(c == cond)
            # Xa bevat alle elementen uit X waar de klasse gelijk van is aan i + 1.0
            Xa = [X[:, b] for b in indices]
            # Bovenstaande pakt de xjes in een extra array, dit willen we niet
            Xa = Xa[0]
            Na = shape(Xa)[1]

            self.mu[i] = mean(Xa, axis=1)
            # Tile smeert mu uit zodat we mu kunnen aftrekken van de X matrix
            self.cov[i] = cov(Xa - tile(self.mu[i].T, Na).reshape(self.n, Na))

            # De kans op deze klasse
            self.P[i] = (Na * 1.0) / self.N
Esempio n. 7
0
    theta = r[0]
    cov_x = r[1]
    return theta, cov_x * (noise**2)
    # print cov_x


def plott():
    y = f(x, theta_0) + numpy.random.normal(scale=noise)
    chi2 = lambda theta: sum((f(x, theta) - y)**2) / noise**2
    p.figure()
    tval = p.arange(25, 35, 1)
    p.plot(tval, [chi2(t) for t in tval], '-')


print("original parameters:         ", theta_0)
print("mean fit values:             ",
      p.mean([estimate()[0] for _ in range(rep)], axis=0))
print()
print("mean fit parameter deviation:",
      p.std([estimate()[0] for _ in range(rep)], axis=0))
print()
print("mean deviation estimate:     ",
      p.mean([p.sqrt(p.diag(estimate()[1])) for _ in range(rep)], axis=0))
print()
print("fit parameter covariances:")
print(p.cov([estimate()[0] for _ in range(rep)], rowvar=0))
print()
print("mean covariance matrix:      ")
print(p.mean([estimate()[1] for _ in range(rep)], axis=0))
Esempio n. 8
0
def mvn(model, disease, param_type_list, country, sex, year, iter, burn, thin,
        rate_type_list):
    ''' multivariate normal country-sex-specific fit
    model : data.Model()
    disease : int, model number
    param_type_list : list of str, 'i', 'r', 'f', or 'p'
    country : str, ISO3 code
    sex : str, 'male', 'female', or 'total'
    year : int, 1990, 2005, 2010
    iter : int,
    burn : int,
    thin : int,
    rate_type_list : list of str, length must be equal to param_type_list or of length 1, ex. if len(param_type_list)==1: rate_type_list=['neg_binom', 'binom']
    '''

    # assert that system arguments are correct
    if len(rate_type_list) != 1:
        assert len(rate_type_list) == len(
            param_type_list
        ), 'rate_type_list has the incorrect number of arguments--length must be 1 or match length of param_type_list'

    # if there are multiple rate types, create a dictionary with keys corresponding to data_type
    if len(rate_type_list) > 1:
        rate_type = {}
        for i, data_type in enumerate(param_type_list):
            rate_type[data_type] = rate_type_list[i]
    # otherwise, change list to string to be correctly processed by ism.py
    else:
        rate_type = rate_type_list[0]
    # set priors
    priors = {}
    for data_type in param_type_list:
        # get prior for each data_type
        priors[data_type] = get_emp(disease, data_type, country, sex, year)
        # set RE and FE
        find_fnrfx(model, disease, data_type, country, sex, year)

    # add vars
    if len(param_type_list) > 1:
        model.vars += dismod3.ism.consistent(model,
                                             country,
                                             sex,
                                             year,
                                             rate_type=rate_type)
    else:
        model.vars += dismod3.ism.age_specific_rate(model,
                                                    data_type,
                                                    country,
                                                    sex,
                                                    year,
                                                    rate_type=rate_type)

    # add gamma priors and mc.potential
    for data_type in param_type_list:
        pred_rate = pl.array(priors[data_type])
        mu = pred_rate.mean(1)
        C = pl.cov(pred_rate)

        knots = []
        # knots where prediction has absolute certainty make prior with zero probability
        for k_i in model.parameters[data_type]['parameter_age_mesh']:
            if pred_rate[k_i, :].std() > 0:
                knots.append(k_i)

        @mc.potential(name='parent_similarity_%s' % data_type)
        def parent_similarity(x=model.vars[data_type]['mu_age'],
                              mu=mu,
                              C=C,
                              knots=knots):
            return mc.mv_normal_cov_like(x[knots], mu[knots],
                                         C[:, knots][knots, :])

        model.vars[data_type]['parent_similarity'] = parent_similarity

    if len(param_type_list) > 1:
        dismod3.fit.fit_consistent(model, iter=iter, thin=thin, burn=burn)
    else:
        dismod3.fit.fit_asr(model, data_type, iter=iter, burn=burn, thin=thin)

    model.priors = priors
    return model
Esempio n. 9
0
def mvn(model, disease, param_type_list, country, sex, year, iter, burn, thin, rate_type_list):
    ''' multivariate normal country-sex-specific fit
    model : data.Model()
    disease : int, model number
    param_type_list : list of str, 'i', 'r', 'f', or 'p'
    country : str, ISO3 code
    sex : str, 'male', 'female', or 'total'
    year : int, 1990, 2005, 2010
    iter : int,
    burn : int,
    thin : int,
    rate_type_list : list of str, length must be equal to param_type_list or of length 1, ex. if len(param_type_list)==1: rate_type_list=['neg_binom', 'binom']
    '''

    # assert that system arguments are correct
    if len(rate_type_list) != 1:
        assert len(rate_type_list) == len(param_type_list), 'rate_type_list has the incorrect number of arguments--length must be 1 or match length of param_type_list'
    
    # if there are multiple rate types, create a dictionary with keys corresponding to data_type
    if len(rate_type_list) > 1:
        rate_type = {}
        for i,data_type in enumerate(param_type_list):
            rate_type[data_type] = rate_type_list[i]
    # otherwise, change list to string to be correctly processed by ism.py
    else:
        rate_type = rate_type_list[0]
    # set priors
    priors = {}
    for data_type in param_type_list:
        # get prior for each data_type
        priors[data_type] = get_emp(disease, data_type, country, sex, year)
        # set RE and FE
        find_fnrfx(model, disease, data_type, country, sex, year)

    # add vars
    if len(param_type_list) > 1:
        model.vars += dismod3.ism.consistent(model, country, sex, year, rate_type=rate_type)
    else:
        model.vars += dismod3.ism.age_specific_rate(model, data_type, country, sex, year, rate_type=rate_type)
        
    # add gamma priors and mc.potential
    for data_type in param_type_list:
        pred_rate = pl.array(priors[data_type])
        mu = pred_rate.mean(1)
        C = pl.cov(pred_rate)

        knots = []
        # knots where prediction has absolute certainty make prior with zero probability
        for k_i in model.parameters[data_type]['parameter_age_mesh']:
            if pred_rate[k_i,:].std() > 0:
                knots.append(k_i)
        
        @mc.potential(name='parent_similarity_%s'%data_type)
        def parent_similarity(x=model.vars[data_type]['mu_age'], mu=mu, C=C, knots=knots):
            return mc.mv_normal_cov_like(x[knots], mu[knots], C[:,knots][knots,:])
        model.vars[data_type]['parent_similarity'] = parent_similarity

    if len(param_type_list) > 1:
        dismod3.fit.fit_consistent(model, iter=iter, thin=thin, burn=burn)
    else:
        dismod3.fit.fit_asr(model, data_type, iter=iter, burn=burn, thin=thin)

    model.priors = priors
    return model  
Esempio n. 10
0
def main():
    mu = pl.array([[2], [8], [16], [32]])
    Sigma = pl.array([[3.01602775,  1.02746769, -3.60224613, -2.08792829],
                      [1.02746769,  5.65146472, -3.98616664,  0.48723704],
                      [-3.60224613, -3.98616664, 13.04508284, -1.59255406],
                      [-2.08792829,  0.48723704, -1.59255406,  8.28742469]])
    d, U = pl.eig(Sigma)
    L = pl.diagflat(d)
    A = pl.dot(U, pl.sqrt(L))

    N = []
    mu_deviations = []
    Sigma_deviations = []

    # First part of the exercise.
    # This loop is used to get different sizes of N.
    for i in range(1, 40):
        means = pl.array([])
        covariances = pl.array([])
        N.append(50 * i)
        # From this loop, the average is taken to get an accurate measurement.
        for _ in range(1, 200):
            X = pl.randn(4, 50 * i)
            Y = pl.dot(A, X) + pl.tile(mu, 50 * i)
            mean = pl.mean(Y, axis=1)
            covariance = pl.cov(Y)
            covariance = covariance.reshape((1, 16))
            if (len(means) == 0 and len(covariances) == 0):
                means = mean
                covariances = covariance
            else:
                means = pl.vstack((means, mean))
                covariances = pl.vstack((covariances, covariance))
        mu_deviations.append(pl.mean(pl.std(covariances, axis=0)))
        Sigma_deviations.append(pl.mean(pl.std(means, axis=0)))

    pl.figure(1)
    pl.clf()
    pl.title('The average deviation, over 200 times,\n of the mean\
             and covariance matrix for a given N')
    pl.xlabel('N')
    pl.ylabel('average deviation')
    pl.plot(N, mu_deviations, label='average mean deviation')
    pl.plot(N, Sigma_deviations, label='average covariance deviation')
    pl.legend()
    pl.savefig('fig22.png')

    # Second part of the exercise.
    covariances = pl.array([])

    # Over the loop is iterated to create a data matrix of the covariances of
    # the data matrices obtained from the multivariate normal distribution.
    # The covariance from this data matrix of covariances is shown.
    for _ in range(1, 200):
        X = pl.randn(4, 1000)
        Y = pl.dot(A, X) + pl.tile(mu, 1000)
        covariance = pl.cov(Y)
        if (len(covariances) == 0):
            covariances = covariance
        else:
            covariances = pl.hstack((covariances, covariance))
    covariance_data = pl.cov(covariances)
    print(covariance_data)
Esempio n. 11
0
    def __init__(self, symbols, start=None, end=None, bench='^GSPC'):

        logger.info(f'Get optimal allocation: class Portfolio')

        # Make sure input is a list
        if type(symbols) != list:
            symbols = [symbols]

        # Create distionary to hold assets.
        self.asset = {}

        nb_of_days_considered_min = int(busday_count(start, end) * .9)

        # Retrieve assets from data source (IE. Yahoo)
        logger.info(f'Get optimal allocation: get assets historic')
        symbols_considered = []
        for symbol in tqdm(symbols):
            try:
                historical_data = DataReader(symbol, "yahoo", start=start, end=end)
                if nb_of_days_considered_min < historical_data.shape[0]:
                    logger.info(
                        f'Get optimal allocation: Select asset with {nb_of_days_considered_min} days min - Asset: {symbol} with {historical_data.shape[0]} days')
                    self.asset[symbol] = historical_data
                    symbols_considered.append(symbol)
                else:
                    logger.info(f'Get optimal allocation: Asset {symbol}.')
                    logger.info(f'Error: Not enough historic')
            except Exception as e:
                logger.info(f'Get optimal allocation: Asset {symbol} not found.')
                logger.info(f'Error: {e}')

        logger.info(f'Get optimal allocation: {len(symbols)} symbols (assets)')

        # Keep only considered symbols
        self.asset = {k: self.asset[k] for k in self.asset if k in symbols_considered}

        # Get Benchmark asset.
        self.benchmark = DataReader(bench, "yahoo", start=start, end=end)
        self.benchmark['Return'] = self.benchmark['Adj Close'].diff()

        # Get returns, beta, alpha, and sharp ratio.
        iteration = 1
        for symbol in symbols_considered:
            logger.info(f'---------')
            logger.info(f'Get optimal allocation: Iteration: {iteration}/{len(symbols_considered)} ({datetime.datetime.today()})')
            logger.info(f'Get optimal allocation: Symbol consider: {symbol}')
            self.benchmark = self.benchmark.loc[self.benchmark.index.intersection(self.asset[symbol].index)]
            self.asset[symbol] = self.asset[symbol].loc[self.asset[symbol].index.intersection(self.benchmark.index)]
            logger.info(
                f'Get optimal allocation: {symbol} shape: {self.asset[symbol].shape}, bench shape: {self.benchmark.shape}')

            # Get returns.
            self.asset[symbol]['Return'] = self.asset[symbol]['Adj Close'].diff()
            # Get Beta.
            A = self.asset[symbol]['Return'].fillna(0)
            B = self.benchmark['Return'].fillna(0)

            self.asset[symbol]['Beta'] = cov(A, B)[0, 1] / cov(A, B)[1, 1]

            # Get Alpha
            self.asset[symbol]['Alpha'] = self.asset[symbol]['Return'] - \
                                          self.asset[symbol]['Beta'] * self.benchmark['Return']

            # Get Sharpe Ratio
            tmp = self.asset[symbol]['Return']
            self.asset[symbol]['Sharpe'] = \
                sqrt(len(tmp)) * mean(tmp.fillna(0)) / std(tmp.fillna(0))
            iteration += 1

        self.dates_to_consider = self.benchmark.index
Esempio n. 12
0
    theta = r[0]
    cov_x = r[1]
    return theta, cov_x * (noise**2)
    # print cov_x


def plott():
    y = f(x, theta_0) + numpy.random.normal(scale=noise)
    chi2 = lambda theta: sum((f(x, theta) - y)**2) / noise**2
    p.figure()
    tval = p.arange(25, 35, 1)
    p.plot(tval, [chi2(t) for t in tval], '-')


print "original parameters:         ", theta_0
print "mean fit values:             ", p.mean(
    [estimate()[0] for _ in xrange(rep)], axis=0)
print
print "mean fit parameter deviation:", p.std(
    [estimate()[0] for _ in xrange(rep)], axis=0)
print
print "mean deviation estimate:     ", p.mean(
    [p.sqrt(p.diag(estimate()[1])) for _ in xrange(rep)], axis=0)
print
print "fit parameter covariances:"
print p.cov([estimate()[0] for _ in xrange(rep)], rowvar=0)
print
print "mean covariance matrix:      "
print p.mean([estimate()[1] for _ in xrange(rep)], axis=0)
Esempio n. 13
0
 def cov(self):
     keys, values = self.returns().keys(), self.returns().values()
     return DataFrame(
         cov(array(values)), index=keys, columns=keys) 
 def computeSampleSlope(sample, yfunc):
   xs = [pl.norm(frame["position"][:2]) for frame in sample["frames"]]
   ys = [yfunc(frame) for frame in sample["frames"]]
   covarianceMat = pl.cov(xs, ys)
   return covarianceMat[1,0] / covarianceMat[0,0]