def cov(self): tmp = self.returns() tmpl = [] for symbol in tmp.keys(): tmpl.append(tmp[symbol]) return DataFrame( cov(array(tmpl)), index=tmp.keys(), columns=tmp.keys())
def cov(self): tmp = self.returns() tmpl = [] for symbol in self.asset['adj_close'].columns: #tmpl.append(tmp[symbol]) tmpl.append(tmp[symbol].values) return DataFrame( cov(array(tmpl)), index=self.asset['adj_close'].columns, columns=self.asset['adj_close'].columns)
def princomps(data, numprincomps): """ Compute the first numprincomps principal components of (columnwise represented) dataset data. Returns the transformation (first numprincomps eigenvectors as matrix) and the resulting low-dimensional codes. """ from pylab import cov from numpy.linalg import eigh m = data.mean(1)[:, newaxis] u, v = eigh(cov(data - m, rowvar=1, bias=1)) V = ((u**(-0.5))[-numprincomps:][newaxis, :] * v[:, -numprincomps:]).T W = ((u**(0.5))[-numprincomps:][newaxis, :] * v[:, -numprincomps:]) return V, dot(V, data - m)
def princomps(data,numprincomps): """ Compute the first numprincomps principal components of (columnwise represented) dataset data. Returns the transformation (first numprincomps eigenvectors as matrix) and the resulting low-dimensional codes. """ from pylab import cov from numpy.linalg import eigh m = data.mean(1)[:,newaxis] u,v=eigh(cov(data-m,rowvar=1,bias=1)) V = ((u**(-0.5))[-numprincomps:][newaxis,:]*v[:,-numprincomps:]).T W = ((u**(0.5))[-numprincomps:][newaxis,:]*v[:,-numprincomps:]) return V, dot(V,data-m)
def __init__(self, symbols, start=None, end=None, bench='^GSPC'): # Make sure input is a list if type(symbols) != list: symbols = [symbols] # Create distionary to hold assets. self.asset = {} # Retrieve assets from data source (IE. Yahoo) for symbol in symbols: try: self.asset[symbol] = DataReader( symbol, "yahoo", start=start, end=end) except: print("Asset " + str(symbol) + " not found!") # Get Benchmark asset. self.benchmark = DataReader(bench, "yahoo", start=start, end=end) self.benchmark['Return'] = self.benchmark['Adj Close'].diff() # Get returns, beta, alpha, and sharp ratio. for symbol in symbols: # Get returns. self.asset[symbol]['Return'] = \ self.asset[symbol]['Adj Close'].diff() # Get Beta. A = self.asset[symbol]['Return'].fillna(0) B = self.benchmark['Return'].fillna(0) self.asset[symbol]['Beta'] = cov(A, B)[0, 1] / cov(A, B)[1, 1] # Get Alpha self.asset[symbol]['Alpha'] = self.asset[symbol]['Return'] - \ self.asset[symbol]['Beta'] * self.benchmark['Return'] # Get Sharpe Ratio tmp = self.asset[symbol]['Return'] self.asset[symbol]['Sharpe'] = \ sqrt(len(tmp)) * mean(tmp.fillna(0)) / std(tmp.fillna(0))
def __init__(self, X, c): self.n, self.N = X.shape self.X = X self.mu = empty((3, self.n)) self.cov = empty((3, self.n, self.n)) self.P = empty(3) cond = zeros(self.N) for i in range(0, 3): cond = cond + 1.0 indices = where(c == cond) # Xa bevat alle elementen uit X waar de klasse gelijk van is aan i + 1.0 Xa = [X[:, b] for b in indices] # Bovenstaande pakt de xjes in een extra array, dit willen we niet Xa = Xa[0] Na = shape(Xa)[1] self.mu[i] = mean(Xa, axis=1) # Tile smeert mu uit zodat we mu kunnen aftrekken van de X matrix self.cov[i] = cov(Xa - tile(self.mu[i].T, Na).reshape(self.n, Na)) # De kans op deze klasse self.P[i] = (Na * 1.0) / self.N
theta = r[0] cov_x = r[1] return theta, cov_x * (noise**2) # print cov_x def plott(): y = f(x, theta_0) + numpy.random.normal(scale=noise) chi2 = lambda theta: sum((f(x, theta) - y)**2) / noise**2 p.figure() tval = p.arange(25, 35, 1) p.plot(tval, [chi2(t) for t in tval], '-') print("original parameters: ", theta_0) print("mean fit values: ", p.mean([estimate()[0] for _ in range(rep)], axis=0)) print() print("mean fit parameter deviation:", p.std([estimate()[0] for _ in range(rep)], axis=0)) print() print("mean deviation estimate: ", p.mean([p.sqrt(p.diag(estimate()[1])) for _ in range(rep)], axis=0)) print() print("fit parameter covariances:") print(p.cov([estimate()[0] for _ in range(rep)], rowvar=0)) print() print("mean covariance matrix: ") print(p.mean([estimate()[1] for _ in range(rep)], axis=0))
def mvn(model, disease, param_type_list, country, sex, year, iter, burn, thin, rate_type_list): ''' multivariate normal country-sex-specific fit model : data.Model() disease : int, model number param_type_list : list of str, 'i', 'r', 'f', or 'p' country : str, ISO3 code sex : str, 'male', 'female', or 'total' year : int, 1990, 2005, 2010 iter : int, burn : int, thin : int, rate_type_list : list of str, length must be equal to param_type_list or of length 1, ex. if len(param_type_list)==1: rate_type_list=['neg_binom', 'binom'] ''' # assert that system arguments are correct if len(rate_type_list) != 1: assert len(rate_type_list) == len( param_type_list ), 'rate_type_list has the incorrect number of arguments--length must be 1 or match length of param_type_list' # if there are multiple rate types, create a dictionary with keys corresponding to data_type if len(rate_type_list) > 1: rate_type = {} for i, data_type in enumerate(param_type_list): rate_type[data_type] = rate_type_list[i] # otherwise, change list to string to be correctly processed by ism.py else: rate_type = rate_type_list[0] # set priors priors = {} for data_type in param_type_list: # get prior for each data_type priors[data_type] = get_emp(disease, data_type, country, sex, year) # set RE and FE find_fnrfx(model, disease, data_type, country, sex, year) # add vars if len(param_type_list) > 1: model.vars += dismod3.ism.consistent(model, country, sex, year, rate_type=rate_type) else: model.vars += dismod3.ism.age_specific_rate(model, data_type, country, sex, year, rate_type=rate_type) # add gamma priors and mc.potential for data_type in param_type_list: pred_rate = pl.array(priors[data_type]) mu = pred_rate.mean(1) C = pl.cov(pred_rate) knots = [] # knots where prediction has absolute certainty make prior with zero probability for k_i in model.parameters[data_type]['parameter_age_mesh']: if pred_rate[k_i, :].std() > 0: knots.append(k_i) @mc.potential(name='parent_similarity_%s' % data_type) def parent_similarity(x=model.vars[data_type]['mu_age'], mu=mu, C=C, knots=knots): return mc.mv_normal_cov_like(x[knots], mu[knots], C[:, knots][knots, :]) model.vars[data_type]['parent_similarity'] = parent_similarity if len(param_type_list) > 1: dismod3.fit.fit_consistent(model, iter=iter, thin=thin, burn=burn) else: dismod3.fit.fit_asr(model, data_type, iter=iter, burn=burn, thin=thin) model.priors = priors return model
def mvn(model, disease, param_type_list, country, sex, year, iter, burn, thin, rate_type_list): ''' multivariate normal country-sex-specific fit model : data.Model() disease : int, model number param_type_list : list of str, 'i', 'r', 'f', or 'p' country : str, ISO3 code sex : str, 'male', 'female', or 'total' year : int, 1990, 2005, 2010 iter : int, burn : int, thin : int, rate_type_list : list of str, length must be equal to param_type_list or of length 1, ex. if len(param_type_list)==1: rate_type_list=['neg_binom', 'binom'] ''' # assert that system arguments are correct if len(rate_type_list) != 1: assert len(rate_type_list) == len(param_type_list), 'rate_type_list has the incorrect number of arguments--length must be 1 or match length of param_type_list' # if there are multiple rate types, create a dictionary with keys corresponding to data_type if len(rate_type_list) > 1: rate_type = {} for i,data_type in enumerate(param_type_list): rate_type[data_type] = rate_type_list[i] # otherwise, change list to string to be correctly processed by ism.py else: rate_type = rate_type_list[0] # set priors priors = {} for data_type in param_type_list: # get prior for each data_type priors[data_type] = get_emp(disease, data_type, country, sex, year) # set RE and FE find_fnrfx(model, disease, data_type, country, sex, year) # add vars if len(param_type_list) > 1: model.vars += dismod3.ism.consistent(model, country, sex, year, rate_type=rate_type) else: model.vars += dismod3.ism.age_specific_rate(model, data_type, country, sex, year, rate_type=rate_type) # add gamma priors and mc.potential for data_type in param_type_list: pred_rate = pl.array(priors[data_type]) mu = pred_rate.mean(1) C = pl.cov(pred_rate) knots = [] # knots where prediction has absolute certainty make prior with zero probability for k_i in model.parameters[data_type]['parameter_age_mesh']: if pred_rate[k_i,:].std() > 0: knots.append(k_i) @mc.potential(name='parent_similarity_%s'%data_type) def parent_similarity(x=model.vars[data_type]['mu_age'], mu=mu, C=C, knots=knots): return mc.mv_normal_cov_like(x[knots], mu[knots], C[:,knots][knots,:]) model.vars[data_type]['parent_similarity'] = parent_similarity if len(param_type_list) > 1: dismod3.fit.fit_consistent(model, iter=iter, thin=thin, burn=burn) else: dismod3.fit.fit_asr(model, data_type, iter=iter, burn=burn, thin=thin) model.priors = priors return model
def main(): mu = pl.array([[2], [8], [16], [32]]) Sigma = pl.array([[3.01602775, 1.02746769, -3.60224613, -2.08792829], [1.02746769, 5.65146472, -3.98616664, 0.48723704], [-3.60224613, -3.98616664, 13.04508284, -1.59255406], [-2.08792829, 0.48723704, -1.59255406, 8.28742469]]) d, U = pl.eig(Sigma) L = pl.diagflat(d) A = pl.dot(U, pl.sqrt(L)) N = [] mu_deviations = [] Sigma_deviations = [] # First part of the exercise. # This loop is used to get different sizes of N. for i in range(1, 40): means = pl.array([]) covariances = pl.array([]) N.append(50 * i) # From this loop, the average is taken to get an accurate measurement. for _ in range(1, 200): X = pl.randn(4, 50 * i) Y = pl.dot(A, X) + pl.tile(mu, 50 * i) mean = pl.mean(Y, axis=1) covariance = pl.cov(Y) covariance = covariance.reshape((1, 16)) if (len(means) == 0 and len(covariances) == 0): means = mean covariances = covariance else: means = pl.vstack((means, mean)) covariances = pl.vstack((covariances, covariance)) mu_deviations.append(pl.mean(pl.std(covariances, axis=0))) Sigma_deviations.append(pl.mean(pl.std(means, axis=0))) pl.figure(1) pl.clf() pl.title('The average deviation, over 200 times,\n of the mean\ and covariance matrix for a given N') pl.xlabel('N') pl.ylabel('average deviation') pl.plot(N, mu_deviations, label='average mean deviation') pl.plot(N, Sigma_deviations, label='average covariance deviation') pl.legend() pl.savefig('fig22.png') # Second part of the exercise. covariances = pl.array([]) # Over the loop is iterated to create a data matrix of the covariances of # the data matrices obtained from the multivariate normal distribution. # The covariance from this data matrix of covariances is shown. for _ in range(1, 200): X = pl.randn(4, 1000) Y = pl.dot(A, X) + pl.tile(mu, 1000) covariance = pl.cov(Y) if (len(covariances) == 0): covariances = covariance else: covariances = pl.hstack((covariances, covariance)) covariance_data = pl.cov(covariances) print(covariance_data)
def __init__(self, symbols, start=None, end=None, bench='^GSPC'): logger.info(f'Get optimal allocation: class Portfolio') # Make sure input is a list if type(symbols) != list: symbols = [symbols] # Create distionary to hold assets. self.asset = {} nb_of_days_considered_min = int(busday_count(start, end) * .9) # Retrieve assets from data source (IE. Yahoo) logger.info(f'Get optimal allocation: get assets historic') symbols_considered = [] for symbol in tqdm(symbols): try: historical_data = DataReader(symbol, "yahoo", start=start, end=end) if nb_of_days_considered_min < historical_data.shape[0]: logger.info( f'Get optimal allocation: Select asset with {nb_of_days_considered_min} days min - Asset: {symbol} with {historical_data.shape[0]} days') self.asset[symbol] = historical_data symbols_considered.append(symbol) else: logger.info(f'Get optimal allocation: Asset {symbol}.') logger.info(f'Error: Not enough historic') except Exception as e: logger.info(f'Get optimal allocation: Asset {symbol} not found.') logger.info(f'Error: {e}') logger.info(f'Get optimal allocation: {len(symbols)} symbols (assets)') # Keep only considered symbols self.asset = {k: self.asset[k] for k in self.asset if k in symbols_considered} # Get Benchmark asset. self.benchmark = DataReader(bench, "yahoo", start=start, end=end) self.benchmark['Return'] = self.benchmark['Adj Close'].diff() # Get returns, beta, alpha, and sharp ratio. iteration = 1 for symbol in symbols_considered: logger.info(f'---------') logger.info(f'Get optimal allocation: Iteration: {iteration}/{len(symbols_considered)} ({datetime.datetime.today()})') logger.info(f'Get optimal allocation: Symbol consider: {symbol}') self.benchmark = self.benchmark.loc[self.benchmark.index.intersection(self.asset[symbol].index)] self.asset[symbol] = self.asset[symbol].loc[self.asset[symbol].index.intersection(self.benchmark.index)] logger.info( f'Get optimal allocation: {symbol} shape: {self.asset[symbol].shape}, bench shape: {self.benchmark.shape}') # Get returns. self.asset[symbol]['Return'] = self.asset[symbol]['Adj Close'].diff() # Get Beta. A = self.asset[symbol]['Return'].fillna(0) B = self.benchmark['Return'].fillna(0) self.asset[symbol]['Beta'] = cov(A, B)[0, 1] / cov(A, B)[1, 1] # Get Alpha self.asset[symbol]['Alpha'] = self.asset[symbol]['Return'] - \ self.asset[symbol]['Beta'] * self.benchmark['Return'] # Get Sharpe Ratio tmp = self.asset[symbol]['Return'] self.asset[symbol]['Sharpe'] = \ sqrt(len(tmp)) * mean(tmp.fillna(0)) / std(tmp.fillna(0)) iteration += 1 self.dates_to_consider = self.benchmark.index
theta = r[0] cov_x = r[1] return theta, cov_x * (noise**2) # print cov_x def plott(): y = f(x, theta_0) + numpy.random.normal(scale=noise) chi2 = lambda theta: sum((f(x, theta) - y)**2) / noise**2 p.figure() tval = p.arange(25, 35, 1) p.plot(tval, [chi2(t) for t in tval], '-') print "original parameters: ", theta_0 print "mean fit values: ", p.mean( [estimate()[0] for _ in xrange(rep)], axis=0) print print "mean fit parameter deviation:", p.std( [estimate()[0] for _ in xrange(rep)], axis=0) print print "mean deviation estimate: ", p.mean( [p.sqrt(p.diag(estimate()[1])) for _ in xrange(rep)], axis=0) print print "fit parameter covariances:" print p.cov([estimate()[0] for _ in xrange(rep)], rowvar=0) print print "mean covariance matrix: " print p.mean([estimate()[1] for _ in xrange(rep)], axis=0)
def cov(self): keys, values = self.returns().keys(), self.returns().values() return DataFrame( cov(array(values)), index=keys, columns=keys)
def computeSampleSlope(sample, yfunc): xs = [pl.norm(frame["position"][:2]) for frame in sample["frames"]] ys = [yfunc(frame) for frame in sample["frames"]] covarianceMat = pl.cov(xs, ys) return covarianceMat[1,0] / covarianceMat[0,0]