def cov(self, cols=False, max_periods=False, decay=False, shrink=False, AR=False): if cols: if not (isinstance(cols, list)): cols = [cols] X = DataFrame(self[cols]) else: X = DataFrame(self) cols = list(self.columns) if max_periods: X = X[-max_periods:] if AR: R = DataFrame(index=self.index, columns=cols) for col in cols: A = X[col] m = tsa.AR(array(A)) f = m.fit(1) p = f.params R[col] = A - p[0] - p[1] * A.shift(1) R = R[1:] if decay: if (decay <= 0) or (decay >= 1): print 'Warning: The decay parameter is not between 0 and 1.' n = R.shape[0] vec = array(R[0:1]) cov = vec.T.dot(vec) for i in arange(1, n): vec = array(R[i:i + 1]) cov = decay * cov + (1 - decay) * vec.T.dot(vec) cov = DataFrame(cov, index=cols, columns=cols) else: cov = R.cov() elif decay: if (decay <= 0) or (decay >= 1): print 'Warning: The decay parameter is not between 0 and 1.' n = X.shape[0] vec = array(X[0:1]) cov = vec.T.dot(vec) for i in arange(1, n): vec = array(X[i:i + 1]) cov = decay * cov + (1 - decay) * vec.T.dot(vec) cov = DataFrame(cov, index=cols, columns=cols) else: if len(cols) == 1: cov = var(array(X)) else: cov = X.cov() if shrink: if (shrink <= 0) or (shrink >= 1): print 'Warning: The shrinkage parameter is not between 0 and 1.' cov = ShrinkCovs(cov, delta=shrink) return DataFrame(cov, index=X.columns, columns=X.columns)
def test_cov_numeric_only(self, numeric_only): # when dtypes of pandas series are different # then ndarray will have dtype=object, # so it need to be properly handled df = DataFrame({"a": [1, 0], "c": ["x", "y"]}) expected = DataFrame(0.5, index=["a"], columns=["a"]) if numeric_only: result = df.cov(numeric_only=numeric_only) tm.assert_frame_equal(result, expected) else: with pytest.raises(ValueError, match="could not convert string to float"): df.cov(numeric_only=numeric_only)
def test_cov_nullable_integer(self, other_column): # https://github.com/pandas-dev/pandas/issues/33803 data = DataFrame({"a": pd.array([1, 2, None]), "b": other_column}) result = data.cov() arr = np.array([[0.5, 0.5], [0.5, 1.0]]) expected = DataFrame(arr, columns=["a", "b"], index=["a", "b"]) tm.assert_frame_equal(result, expected)
def _get_initialized_parameter( data: pd.DataFrame, n_observed: int, n_latent: int, biased: bool = False) -> Dict[str, torch.nn.Parameter]: """Similar to lavaan's simple.""" scale = 1.0 if biased: scale = (data.shape[0] - 1.0) / data.shape[0] return { "lambda_y": torch.nn.Parameter( torch.ones((n_observed, n_latent), dtype=torch.double)), "beta": torch.nn.Parameter( torch.zeros((n_latent, n_latent), dtype=torch.double)), "psi": torch.nn.Parameter(torch.eye(n_latent, dtype=torch.double)), "theta": torch.nn.Parameter( torch.from_numpy((data.cov().abs() * scale / 2.0).values * np.eye(n_observed)).double().clamp(min=0.1)), "alpha": torch.nn.Parameter(torch.zeros((n_latent, 1), dtype=torch.double)), "nu": torch.nn.Parameter( torch.from_numpy(data.mean().values[:, None]).double()), }
def max_sharpe_ratio_strat( df_oneyear_return: pd.DataFrame, risk_free_rate=default_risk_free_rate) -> pd.DataFrame: df_1_mean_return = df_oneyear_return.mean( ) #daily mean return in the one year df_1_cov = df_oneyear_return.cov() assets_num = len(df_oneyear_return.columns) bounds = tuple((0.0, 1.0) for i in range(assets_num)) guess = np.array(assets_num * [1 / assets_num]) def negative_sharpe(w, df_1_mean_return, df_1_cov, risk_free_rate): vol = np.sqrt(reduce(np.dot, [w.T, df_1_cov, w])) * np.sqrt(252) ret = np.dot(w, df_1_mean_return) * 252 return -(ret - risk_free_rate) / vol args = (df_1_mean_return, df_1_cov, risk_free_rate) constraints = [{'type': 'eq', 'fun': lambda x: sum(x) - 1}] min_neg_sharpe = solver.minimize(fun=negative_sharpe, x0=guess, args=args, constraints=constraints, bounds=bounds, method='SLSQP') return -min_neg_sharpe.fun, min_neg_sharpe.x
def construct_portfolio(self): """ :return: """ pre_date_data = w.tdaysoffset(-self.window, self.date, "Period=M") pre_date = pre_date_data.Data[0][0].strftime("%Y-%m-%d") tradedays_data = w.tdays(pre_date, self.date, "Period=M") tradedayslist = tradedays_data[0] tradedays = [td.strftime("%Y-%m-%d") for td in tradedayslist] # 提取因子数据 style_return = DataFrame() for f in self.factors: f_data = [] for dt in tradedays: stockcodes = StockPool(dt).select_stock() f_data = f(dt, stockcodes).getdata() f_ret = FactorProcess.get_alpha(stockcodes, dt, -1) # 选取一个月的alpha df = DataFrame(data=[f_data, f_ret], columns=[f.windLabel, 'ret']) long_only, long_short = FactorStyle.compute_style_return_month( df, f.windLabel) f_data.append(long_only) style_return[f.windLabel] = f_data S = matrix(style_return.cov().values) pbar = matrix(np.zeros_like(style_return.std().values)) n = len(self.factors) G = matrix(0.0, (n, n)) G[::n + 1] = -1.0 h = matrix(0.0, (n, 1)) A = matrix(1.0, (1, n)) b = matrix(1.0) portfolio_weight = qp(S, -pbar, G, h, A, b)['x']
def test_cov(self, float_frame, float_string_frame): # min_periods no NAs (corner case) expected = float_frame.cov() result = float_frame.cov(min_periods=len(float_frame)) tm.assert_frame_equal(expected, result) result = float_frame.cov(min_periods=len(float_frame) + 1) assert isna(result.values).all() # with NAs frame = float_frame.copy() frame.iloc[:5, frame.columns.get_loc("A")] = np.nan frame.iloc[5:10, frame.columns.get_loc("B")] = np.nan result = frame.cov(min_periods=len(frame) - 8) expected = frame.cov() expected.loc["A", "B"] = np.nan expected.loc["B", "A"] = np.nan tm.assert_frame_equal(result, expected) # regular result = frame.cov() expected = frame["A"].cov(frame["C"]) tm.assert_almost_equal(result["A"]["C"], expected) # exclude non-numeric types with tm.assert_produces_warning( FutureWarning, match="The default value of numeric_only"): result = float_string_frame.cov() expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].cov() tm.assert_frame_equal(result, expected) # Single column frame df = DataFrame(np.linspace(0.0, 1.0, 10)) result = df.cov() expected = DataFrame(np.cov(df.values.T).reshape((1, 1)), index=df.columns, columns=df.columns) tm.assert_frame_equal(result, expected) df.loc[0] = np.nan result = df.cov() expected = DataFrame( np.cov(df.values[1:].T).reshape((1, 1)), index=df.columns, columns=df.columns, ) tm.assert_frame_equal(result, expected)
def test_cov(self, float_frame, float_string_frame): # min_periods no NAs (corner case) expected = float_frame.cov() result = float_frame.cov(min_periods=len(float_frame)) tm.assert_frame_equal(expected, result) result = float_frame.cov(min_periods=len(float_frame) + 1) assert isna(result.values).all() # with NAs frame = float_frame.copy() frame["A"][:5] = np.nan frame["B"][5:10] = np.nan result = float_frame.cov(min_periods=len(float_frame) - 8) expected = float_frame.cov() expected.loc["A", "B"] = np.nan expected.loc["B", "A"] = np.nan # regular float_frame["A"][:5] = np.nan float_frame["B"][:10] = np.nan cov = float_frame.cov() tm.assert_almost_equal(cov["A"]["C"], float_frame["A"].cov(float_frame["C"])) # exclude non-numeric types result = float_string_frame.cov() expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].cov() tm.assert_frame_equal(result, expected) # Single column frame df = DataFrame(np.linspace(0.0, 1.0, 10)) result = df.cov() expected = DataFrame(np.cov(df.values.T).reshape((1, 1)), index=df.columns, columns=df.columns) tm.assert_frame_equal(result, expected) df.loc[0] = np.nan result = df.cov() expected = DataFrame( np.cov(df.values[1:].T).reshape((1, 1)), index=df.columns, columns=df.columns, ) tm.assert_frame_equal(result, expected)
def gmv(returns: pd.DataFrame): """ Returns the weights of the Global Minimum Volatility portfolio given a covariance matrix """ cov = returns.cov() n = cov.shape[0] return msr(0, np.repeat(1, n), cov)
def test_cov_ddof(self, test_ddof): # GH#34611 np_array1 = np.random.rand(10) np_array2 = np.random.rand(10) df = DataFrame({0: np_array1, 1: np_array2}) result = df.cov(ddof=test_ddof) expected_np = np.cov(np_array1, np_array2, ddof=test_ddof) expected = DataFrame(expected_np) tm.assert_frame_equal(result, expected)
class CalWeight: def __init__(self, step, risk_aversion): self.risk_aversion = risk_aversion if step == 0: self.start = '2012-01-01' self.end = '2014-12-31' elif step == 1: self.start = '2012-01-01' self.end = '2015-2-28' elif step == 2: self.start = '2012-01-01' self.end = '2015-4-30' secIDs = [ '000300.ZICN', '000905.ZICN', '399006.ZICN', 'SPX.ZIUS', '000012.ZICN', '000013.ZICN' ] self.rtn_table = DataFrame() for secID in secIDs: cp = self.get_return(secID) cp.name = secID self.rtn_table = pd.concat([self.rtn_table, cp], axis=1) self.rtn_table.fillna(0, inplace=True) self.cov_mat = self.rtn_table.cov() * 250 self.exp_rtn = self.rtn_table.mean() * 250 def get_return(self, ticker): tmp_lst = [] fname = PERFIX + 'data_' + ticker + '.csv' with open(fname, 'r') as f: reader = csv.reader(f) for row in reader: tmp_lst.append(row) df = pd.DataFrame(tmp_lst[1:], columns=tmp_lst[0]) df['Date'] = pd.to_datetime(df['Date']) df = df.set_index("Date") df = df[self.start:self.end] temp = df['Close'].astype('float64').pct_change().fillna(0.) return temp def get_weight(self): risk_aversion = self.risk_aversion P = risk_aversion * matrix(self.cov_mat.values) q = -1 * matrix(self.exp_rtn.values) G = matrix( np.vstack((np.diag(np.ones(len(self.exp_rtn))), np.diag(-np.ones(len(self.exp_rtn)))))) h = matrix( np.array([np.ones(len(self.exp_rtn)), np.zeros(len(self.exp_rtn)) ]).reshape(len(self.exp_rtn) * 2, 1)) A = matrix(np.ones(len(self.exp_rtn)), (1, len(self.exp_rtn))) b = matrix([1.0]) solvers.options['show_progress'] = False sol = solvers.qp(P, q, G, h, A, b) return DataFrame(index=self.exp_rtn.index, data=np.round(sol['x'], 2), columns=['weight']) # 权重精确到小数点后两位
def get_portfolio_risk(cls, weights: list, ror: pd.DataFrame) -> float: """ Computes the std of portfolio returns. """ # cls.weights_sum_is_one(weights) if isinstance(ror, pd.Series): # required for a single asset portfolio return ror.std() weights = np.array(weights) covmat = ror.cov() return math.sqrt(weights.T @ covmat @ weights)
def myconnect(self): user_text1 = str(self.host_le.text()) user_text2 = str(self.user_le.text()) user_text3 = str(self.password_le.text()) user_text4 = str(self.db_le.text()) try: mcon = MySQLdb.connect(host=user_text1, user=user_text2, passwd=user_text3, db=user_text4) self.browser.setText("[*] Welcome, connection successful.") text, ok = QInputDialog.getText(self, "Table Name", "Enter table name:") if ok and text: tb_name = str(text) try: sq_tb = pis.read_sql('select * from '+ ' %s ' % tb_name, mcon) df = DataFrame(sq_tb) mcon.close() size = str(len(df)) stat_description = df.describe() stats = str(stat_description) kt = str(df.kurt()) skew = str(df.skew()) cov = str(df.cov()) corr = str(df.corr()) head = str(df.head()) tail = str(df.tail()) summation = str(stat_description.sum()) self.browser1.setText("Size: " +"%s " %size +"\n"\ +"Statistics:" +"\n"\ +" %s " %stats +"\n"\ +"Kurt:" +"\n"\ +"%s" %kt +"\n"\ +"Skew:" +"\n"\ +"%s" %skew +"\n"\ +"Covarriance:" +"\n"\ +"%s" %cov +"\n"\ +"Correlation:" +"\n"\ +"%s" %corr +"\n"\ +"Summation:" +"\n"\ +"%s" %summation +"\n"\ +"Head:" +"\n"\ +"%s" % head +"\n"\ +"Tail:" +"\n"\ +"%s" %tail) self.browser.setText(stats) self.host_le.clear() self.user_le.clear() self.password_le.clear() self.db_le.clear() except Exception, e: self.browser.setText("[*] Ensure that the table name is correct and try again.") except Exception, e: self.browser.setText("Please specify correct connection details and try again")
def epochs_sim_agg_returns_cov_market_data(returns: pd.DataFrame) -> pd.Series: """Computes the aggregated distribution of returns for a market. :param returns: dataframe with the simulated returns. :type returns: pd.DataFrame """ function_name: str = epochs_sim_agg_returns_cov_market_data.__name__ epochs_sim_tools.function_header_print_data(function_name, [""], "", "", "", sim=True) print("Size of time series and number of companies: ", returns.shape) cov: pd.DataFrame = returns.cov() # eig_vec: eigenvector, eig_val: eigenvalues eig_val, eig_vec = np.linalg.eigh(cov) # rot: rotation, scal: scaling rot, scale = eig_vec, np.diag(1 / np.sqrt(eig_val)) # trans: transformation matrix # trans = rot . scal trans = rot.dot(scale) trans_returns: pd.DataFrame = returns.dot(trans) trans_returns.columns = returns.columns one_col: List[pd.Series] = [] for col in trans_returns.columns: one_col.append(trans_returns[col]) agg_returns: pd.Series = pd.concat(one_col, ignore_index=True) # remove NaN and Inf agg_returns_list: List[float] = [ x for x in agg_returns if not math.isnan(x) and not math.isinf(x) ] # filter out values greater than 10 or smaller than -10 agg_returns_list = [x for x in agg_returns_list if -10 <= x <= 10] agg_returns_series: pd.Series = pd.Series(agg_returns_list) print(f"mean = {agg_returns_series.mean()}") print(f"std = {agg_returns_series.std()}") del returns del trans_returns del one_col return agg_returns_series
def pca(df: DataFrame, file_path: str, eigenvalues_condition: Callable[[float], bool]): """ Transforma un dataset en otro con menos dimensiones mediante PCA y permite guardarlo en un archivo csv. Implementacion basada en el documento 'A tutorial on Principal Components Analysis' de Lindsay I Smith :param df: dataset con atributos solamente numericos y sin el atributo objetivo :param file_path: ruta relativa al archivo csv en donde se guardara el resultado :param eigenvalues_condition: funcion booleana para filtrar los valores propios (y con estos los vectores propios asociados) que se usaran para generar la matriz row_feature_vector (ver documento). """ # se omite el primer paso asumiendo que los datos cumplen las precondiciones # segundo paso: resta de los promedios row_data_adjust = DataFrame() means = [] for a in df.columns.values: means.append(df[a].mean()) for (i, a) in enumerate(df.columns.values): row_data_adjust[a] = df[a] - means[i] # tercer paso: calculo de matriz de covarianzas C = row_data_adjust.cov() # cuarto paso: calculo de valores y vectores propios de la matriz de covarianzas U, Sigma, V = randomized_svd(C.as_matrix(), n_components=C.shape[0], n_iter=5, random_state=None) # quinto paso: eleccion de componentes para formar el vector de caracteristicas order = (-Sigma).argsort() Sigma = Sigma[order] U = U[:, order] filtered_indices = [ i for i in range(len(Sigma)) if eigenvalues_condition(Sigma[i]) ] row_feature_vector = U[:, filtered_indices].transpose() # sexto paso : derivacion del nuevo dataset row_data_adjust = row_data_adjust.as_matrix()\ .transpose() # noinspection PyUnresolvedReferences final_data = np.matmul(row_feature_vector, row_data_adjust) final_data = final_data.transpose() # se guarda en un csv final_data = DataFrame(final_data) final_data.to_csv(file_path, index=False, encoding='utf-8')
def annual_covar(excess_returns: pd.DataFrame) -> pd.DataFrame: "annualized covariance of excess returns" ann_covar = excess_returns.cov() * 252 print('Condition number of annualized covariance matrix is:', np.linalg.cond(ann_covar)) try: eigvals, __ = np.linalg.eig(ann_covar) except: print('Error in Eigen decomposition of covariance matrix') eigvals = [] sys.exit(-1) if min(eigvals) <= 0: print('Error! Negative eigenvalues in covariance matrix detected!') sys.exit(-1) return ann_covar
def calc_kelly_leverages(securities: Set[str], start_date: date, end_date: date, risk_free_rate: float = 0.04) -> Dict[str, float]: """Calculates the optimal leverages for the given securities and time frame. Returns a list of (security, leverage) tuple with the calculate optimal leverages. Note: risk_free_rate is annualized """ f = {} ret = {} excess_return = {} # Download the historical prices from Yahoo Finance and calculate the # excess return (return of security - risk free rate) for each security. for symbol in securities: try: hist_prices = get_historical_data(symbol, start=start_date, end=end_date, output_format='pandas') except IOError as e: raise ValueError(f'Unable to download data for {symbol}. ' f'Reason: {str(e)}') f[symbol] = hist_prices ret[symbol] = hist_prices['close'].pct_change() # risk_free_rate is annualized excess_return[symbol] = (ret[symbol] - (risk_free_rate / 252)) # Create a new DataFrame based on the Excess Returns. df = DataFrame(excess_return).dropna() # Calculate the CoVariance and Mean of the DataFrame C = 252 * df.cov() M = 252 * df.mean() # Calculate the Kelly-Optimal Leverages using Matrix Multiplication F = inv(C).dot(M) # Return a list of (security, leverage) tuple return { security: leverage for security, leverage in zip(df.columns.values.tolist(), F) }
def calc_kelly_leverages(securities: Set[str], start_date: date, end_date: date, risk_free_rate: float = 0.04) -> Dict[str, float]: """Calculates the optimal leverages for the given securities and time frame. Returns a list of (security, leverage) tuple with the calculate optimal leverages. Note: risk_free_rate is annualized """ f = {} ret = {} excess_return = {} # Download the historical prices from Yahoo Finance and calculate the # excess return (return of security - risk free rate) for each security. for symbol in securities: try: hist_prices = get_historical_data( symbol, start=start_date, end=end_date, output_format='pandas') except IOError as e: raise ValueError(f'Unable to download data for {symbol}. ' f'Reason: {str(e)}') f[symbol] = hist_prices ret[symbol] = hist_prices['close'].pct_change() # risk_free_rate is annualized excess_return[symbol] = (ret[symbol] - (risk_free_rate / 252)) # Create a new DataFrame based on the Excess Returns. df = DataFrame(excess_return).dropna() # Calculate the CoVariance and Mean of the DataFrame C = 252 * df.cov() M = 252 * df.mean() # Calculate the Kelly-Optimal Leverages using Matrix Multiplication F = inv(C).dot(M) # Return a list of (security, leverage) tuple return {security: leverage for security, leverage in zip(df.columns.values.tolist(), F)}
def gen_pos_returns(weights: [float], returns: pd.DataFrame, type: str): if type is 'parametric': w = np.array(weights) mean = returns.mean().dot(weights) std = np.sqrt(w.T.dot(returns.cov()).dot(weights)) # parametric calculations return 1 - norm.cdf(0, mean, std) elif type is 'historical': returns_weighted = (returns.dropna()) * weights total_returns = returns_weighted.apply(np.sum, axis=1).sort_values() array = np.asarray(total_returns) idx = (np.abs(array)).argmin() # index of closest return = 0% prob = 1 - (idx) / len(array) # 1 - probability return is less than 0% return prob else: return 'error'
def generate_single_case(returns_df: pd.DataFrame, walk_length: int): assets_counts = len(returns_df.columns) cov = returns_df.cov() returns_mean = returns_df.mean() returns_std = returns_df.std() z_list = [] for asset in returns_df.columns: mean = returns_mean[asset] std = returns_std[asset] standardized_returns = (returns_df[asset] - mean) / std z = np.random.choice(standardized_returns, walk_length) z_list.append([z]) Z = np.concatenate(z_list) L = np.linalg.cholesky(cov) future_returns = np.full( (walk_length, assets_counts), returns_mean).T + np.dot(L, Z) return pd.DataFrame(future_returns.T, columns=returns_df.columns)
def define_matrix_space(features: int, data: pd.DataFrame) -> pd.DataFrame: # Keep original data for later use. Copy values to apply normalization # data_norm = data_normalizer.stretch_to_unary(data) data_norm = normalizer.Map(lambda col: (col - col.mean()) / col.std(), data) # Eigenvectors for the covariance matrix np_vec = np.linalg.eig(data.cov())[1] # Dividing row-wise to normalize vectors np_matrix = np_vec / np_vec[0][:, None][::-1] # select the number of features desired after transformation np_matrix = np_matrix[:features].T # Rotating data points with respect to the eigenvectors rotated_points = np.matmul(data_norm, np_matrix) return pd.DataFrame(rotated_points, columns=data.columns[:features])
def calc_kelly_leverages(securities, start_date, end_date, risk_free_rate=0.04): """Calculates the optimal leverages for the given securities and time frame. Returns a list of (security, leverage) tuple with the calculate optimal leverages. Note: risk_free_rate is annualized """ f = {} ret = {} excess_return = {} # Download the historical prices from Yahoo Finance and calculate the # excess return (return of security - risk free rate) for each security. for symbol in securities: try: hist_prices = web.DataReader(symbol, 'yahoo', start_date, end_date) except IOError as e: print('Unable to download data for %s. Reason: %s' % (symbol, str(e))) return None f[symbol] = hist_prices ret[symbol] = hist_prices['Adj Close'].pct_change() excess_return[symbol] = (ret[symbol] - (risk_free_rate / 252) ) # risk_free_rate is annualized # Create a new DataFrame based on the Excess Returns. df = DataFrame(excess_return).dropna() # Calculate the CoVariance and Mean of the DataFrame C = 252 * df.cov() M = 252 * df.mean() # Calculate the Kelly-Optimal Leverages using Matrix Multiplication F = inv(C).dot(M) # Return a list of (security, leverage) tuple return zip(df.columns.values.tolist(), F)
def gen_cond_var(weights: [float], returns: pd.DataFrame, conf_level: float): # VaR Historical totlat returns #indx = round((1-conf_level)*len(data.index)) # Percentile calculation #return data.iloc[indx] ### CVaR parametric ### w = np.array(weights) mean = returns.mean().dot(weights) std = np.sqrt(w.T.dot(returns.cov()).dot(weights)) # parametric calculations # there's a (1-conf_level)% probability that we loss at least # var percent of our total portfolio value in a day var = norm.ppf((1 - conf_level), mean, std) # equation for expected value given var of normal distribution with known mean and std cvar = .5 * mean * erf( (var - mean) / (math.sqrt(2) * std)) - std * math.exp( -(var - mean)**2 / (2 * std**2)) / math.sqrt(2 * math.pi) + .5 * mean return cvar
def get_pca(features_standard: pd.DataFrame, var_threshold: float = 0.95) -> Tuple[pd.Series, pd.DataFrame]: """ get principle components """ # get our eigenvalues and sort in order of size eig_val, eig_vec = np.linalg.eigh(features_standard.cov()) idx = eig_val.argsort()[::-1] eig_val, eig_vec = eig_val[idx], eig_vec[:, idx] # wrap as pandas objects eig_val = pd.Series(eig_val, index=[f"pc_{1 + i}" for i in range(eig_val.shape[0])], name="eig_vals") eig_vec = pd.DataFrame(eig_vec, index=features_standard.columns, columns=eig_val.index) eig_vec = eig_vec.loc[:, eig_val.index] # reduce dimensions by getting rid of small eigenvalues cumulative_var = eig_val.cumsum()/eig_val.sum() cutoff = cumulative_var.values.searchsorted(var_threshold) eig_val, eig_vec = eig_val.iloc[:cutoff + 1], eig_vec.iloc[:, :cutoff + 1] return eig_val, eig_vec
def min_vol_strat(df_oneyear_return: pd.DataFrame) -> pd.DataFrame: df_1_mean_return = df_oneyear_return.mean( ) #daily mean return in the one year df_1_cov = df_oneyear_return.cov() assets_num = len(df_oneyear_return.columns) bounds = tuple((0.0, 1.0) for i in range(assets_num)) guess = np.array(assets_num * [1 / assets_num]) target_return = np.linspace( max(df_1_mean_return) * 252, min(df_1_mean_return) * 252, 50) def vol(w): return np.sqrt(reduce(np.dot, [w.T, df_1_cov, w])) * np.sqrt(252) min_vol_result = float('inf') min_vol_weight = [] min_vol_return = [] for i in target_return: constraints = [{ 'type': 'eq', 'fun': lambda x: sum(x) - 1 }, { 'type': 'eq', 'fun': lambda x: sum(x * df_1_mean_return) * 252 - i }] min_vol = solver.minimize(fun=vol, x0=guess, constraints=constraints, bounds=bounds, method='SLSQP') if min_vol.fun < min_vol_result: min_vol_result = min_vol.fun min_vol_weight = min_vol.x min_vol_return = i else: break return min_vol_return, min_vol_result, min_vol_weight
def construct_portfolio(self): """ :return: """ pre_date_data = w.tdaysoffset(-self.window, self.date, "Period=M") pre_date = pre_date_data.Data[0][0].strftime("%Y-%m-%d") tradedays_data = w.tdays(pre_date, self.date, "Period=M") tradedayslist = tradedays_data[0] tradedays = [td.strftime("%Y-%m-%d") for td in tradedayslist] # 提取因子数据 style_return = DataFrame() for f in self.factors: f_data = [] for dt in tradedays: stockcodes = StockPool(dt).select_stock() f_data = f(dt, stockcodes).getdata() f_ret = FactorProcess.get_alpha(stockcodes, dt, -1) # 选取一个月的alpha df = DataFrame(data=[f_data, f_ret], columns=[f.windLabel, 'ret']) long_only, long_short = FactorStyle.compute_style_return_month( df, f.windLabel) f_data.append(long_only) style_return[f.windLabel] = f_data S = style_return.cov().values n = len(self.factors) Sigma = S.T.dot(S) weight = Variable(n) gamma = Parameter(sign="positive") risk = quad_form(weight, Sigma) prob = Problem(Maximize(-gamma * risk), [sum_entries(weight) == 1, weight >= 0]) gamma.value = 1 prob.solve() Weight = weight.value / np.sum(weight.value) return Weight
def get_mode(arr): mode = []; arr_appear = dict((a, arr.count(a)) for a in arr); # 统计各个元素出现的次数 if max(arr_appear.values()) == 1: # 如果最大的出现为1 return; # 则没有众数 else: for k, v in arr_appear.items(): # 否则,出现次数最大的数字,就是众数 if v == max(arr_appear.values()): mode.append(k); return mode; get_mode(a) var(a) std(a) a=Series(a) a.skew() a.kurt() a.describe() df = DataFrame({'data1' : np.random.randn(5), 'data2' : np.random.randn(5)}) df.cov() df.corr() ###假设检验 from scipy import stats as ss df=DataFrame({'data':[10.1,10,9.8,10.5,9.7,10.1,9.9,10.2,10.3,9.9]}) ss.ttest_1samp(a = df, popmean = 10)
def test_corr_int(self): # dtypes other than float64 GH#1761 df3 = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]}) df3.cov() df3.corr()
3 3.5 0.5 ] calculate var, cov, corr, cov-matrix, corr-matrix """ data = {'x1': [1, 2], 'x2': [2, 3], 'x3': [2, 1]} A = DataFrame(data) print A data1 = Series([3, 3.5, 0.5], index=['x1', 'x2', 'x3']) A = A.append(data1, ignore_index=True) print '\n', A, '\n' var_x1 = A['x1'].var() var_x2 = A['x2'].var() var_x3 = A['x3'].var() print 'var_x1: %f' % var_x1 print 'var_x2: %f' % var_x2 print 'var_x3: %f' % var_x3 cov_matrix = A.cov() print '\ncov_matrix:\n', cov_matrix corr_matrix = A.corr() print '\ncorr_matrix:\n', corr_matrix print "\nA['x1'].corr(A['x2']): %f" % A['x1'].corr(A['x2']) print "A['x1'].corr(A['x3']): %f" % A['x1'].corr(A['x3'])
}, index=list('abcd')) frame.ix['f'] = np.random.randn(4) frame['loc'] = ['ST', 'MO'] * 3 frame.sort_index(axis=1) frame.sort_values(by=['loc', 'STL']) frame.rank(axis=0) frame.rank(method='max') um.order() um.rank() frame.add(frame2) frame.corr(um) frame.fillna(1, inplace='True') um = frame['UM'] frame.corr() frame.cov() frame2.ix['f'] = np.random.randn(3) frame.corrwith(frame2) frame.corrwith(um) frame.corrwith(um.to_frame()) frame.ix[:, 'Washu':'UMST'].apply(lambda x: x.mean()) frame.set_index('UM', drop=True, inplace=True) keys = frame.index frame.reset_index(level=keys) df = DataFrame(np.random.randn(6, 5), columns=['Ohio', 'Dallas', 'Michigan', 'Miami', 'DC'], index=[['a', 'a', 'b', 'b', 'c', 'd'], [1, 2, 3, 1, 2, 3]]) df.index df.ix['a'] df.sortlevel(level=0, axis=0)
try: hist_prices = web.DataReader(symbol, 'yahoo', start_date, end_date) except IOError, e: print 'Unable to download data for %s. Reason: %s' % (symbol, str(e)) return None f[symbol] = hist_prices ret[symbol] = hist_prices['Adj Close'].pct_change() excess_return[symbol] = (ret[symbol] - (risk_free_rate / 252)) # risk_free_rate is annualized # Create a new DataFrame based on the Excess Returns. df = DataFrame(excess_return).dropna() # Calculate the CoVariance and Mean of the DataFrame C = 252 * df.cov() M = 252 * df.mean() # Calculate the Kelly-Optimal Leverages using Matrix Multiplication F = inv(C).dot(M) # Return a list of (security, leverage) tuple return zip(df.columns.values.tolist(), F) def main(): """Entry point of Kelly Criterion calculation.""" print "Kelly Criterion calculation" args = docopt(__doc__, sys.argv[1:])
for i in range(len(exchange_rates)-1): row = {} for tp, cur in zip(metadata['Time Period'], metadata['Currency:']): x1 = float(exchange_rates[tp][i]) x2 = float(exchange_rates[tp][i+1]) if cur == 'USD': x1 = 1.0 / x1 x2 = 1.0 / x2 # Returns are in units of %. row[tp] = 100 * (x1 - x2) / x2 rows.append(row) returns = DataFrame(data=rows, columns=list(metadata['Time Period'])) returns_cov = returns.cov() # Means are the expected returns for each currency. exp_returns = concat({'mean': returns.mean(), 'variance': returns.var()}, axis = 1) class CurrencyPortfolio(YhatModel): @preprocess(in_type=dict, out_type=dict) def execute(self, data): P = matrix(data['risk_aversion'] * returns_cov.as_matrix()) q = matrix(-exp_returns['mean'].as_matrix()) G = matrix(0.0, (len(q),len(q))) G[::len(q)+1] = -1.0 h = matrix(0.0, (len(q),1)) A = matrix(1.0, (1,len(q))) b = matrix(1.0)
obj.reindex() data = DataFrame([[1,2,3],[4,5,6]]) data.drop() np.argsort() obj.rank() obj.sort_values() data.tail() data.cov() data.cov() data.corr() data.dropna() data.loc data.fillna() data.unstack()
pd.Index obj = Series([1, 2, 3]) obj.reindex() data = DataFrame([[1, 2, 3], [4, 5, 6]]) data.drop() np.argsort() obj.rank() obj.sort_values() data.tail() data.cov() data.cov() data.corr() data.dropna() data.loc data.fillna() data.unstack()
def _calc_covariance_matrix(ts_df: DataFrame, frequency: int = 252) -> DataFrame: return ts_df.cov() * frequency
#rtn_table.head(5) #rtn_table.mean()*250 #rtn_table.corr() print(rtn_table.mean() * 250) print(rtn_table.corr()) print("*************************************************") from cvxopt import matrix, solvers portfolio1 = [0, 1, 2, 4, 5] portfolio2 = range(6) cov_mat = rtn_table.cov() * 250 exp_rtn = rtn_table.mean() * 250 def cal_efficient_frontier(portfolio): if len(portfolio) <= 2 or len(portfolio) > 6: raise Exception('portfolio必须为长度大于2小于7的list!') #print(cov_mat) cov_mat1 = cov_mat.iloc[portfolio, portfolio] exp_rtn1 = exp_rtn.iloc[portfolio] max_rtn = max(exp_rtn1) min_rtn = min(exp_rtn1) risks = [] returns = []
print(df5) import math def int_float_squares(series): return pd.Series({"int_sq": series["int_col"] ** 2, "flt_sq": series["float_col"] ** 2}) print(df.apply(int_float_squares, axis=1)) ### 7. Basic Stats ### print(df.describe()) print(df.cov()) print(df.corr()) ### 8. Merge and Join ### print(df) other = DataFrame({"str_col": ["a", "b"], "some_val": [1, 2]}) print(other) print(pd.merge(df, other, on="str_col", how="inner")) print(pd.merge(df, other, on="str_col", how="outer")) print(pd.merge(df, other, on="str_col", how="left")) print(pd.merge(df, other, on="str_col", how="right")) ### 9. Plot ### plot_df = DataFrame(np.random.randn(1000, 2), columns=["x", "y"])
print "*"*15 print "Definimos de nuevo el dataframe" df = pd.DataFrame(data={"A":[1,2], "B":[2.6,1.3]}) print df print "añadimos columnas combinando las actuales" df["C"] = df["A"]+df["B"] df["D"] = df["A"]*3 df["E"] = np.sqrt(df["A"]) print df print "*"*15 print "Datos disponibles de un dataframe" print " descripcion del dataframe" print df.describe() print " covarianza " print df.cov() print " correlación " print df.corr() print "*"*15 print " Creamos otro dataframe con valores aleatorios (1000 filas y 2 columnas " print " DataFrame(np.random.randn(1000,2),columns=['x','y'])" plot_df = DataFrame(np.random.randn(1000,2),columns=['x','y']) print plot_df print "Mostramos las graficas" plot_df.plot() plot_df.hist()