def cov(self,
         cols=False,
         max_periods=False,
         decay=False,
         shrink=False,
         AR=False):
     if cols:
         if not (isinstance(cols, list)): cols = [cols]
         X = DataFrame(self[cols])
     else:
         X = DataFrame(self)
         cols = list(self.columns)
     if max_periods:
         X = X[-max_periods:]
     if AR:
         R = DataFrame(index=self.index, columns=cols)
         for col in cols:
             A = X[col]
             m = tsa.AR(array(A))
             f = m.fit(1)
             p = f.params
             R[col] = A - p[0] - p[1] * A.shift(1)
         R = R[1:]
         if decay:
             if (decay <= 0) or (decay >= 1):
                 print 'Warning: The decay parameter is not between 0 and 1.'
             n = R.shape[0]
             vec = array(R[0:1])
             cov = vec.T.dot(vec)
             for i in arange(1, n):
                 vec = array(R[i:i + 1])
                 cov = decay * cov + (1 - decay) * vec.T.dot(vec)
             cov = DataFrame(cov, index=cols, columns=cols)
         else:
             cov = R.cov()
     elif decay:
         if (decay <= 0) or (decay >= 1):
             print 'Warning: The decay parameter is not between 0 and 1.'
         n = X.shape[0]
         vec = array(X[0:1])
         cov = vec.T.dot(vec)
         for i in arange(1, n):
             vec = array(X[i:i + 1])
             cov = decay * cov + (1 - decay) * vec.T.dot(vec)
         cov = DataFrame(cov, index=cols, columns=cols)
     else:
         if len(cols) == 1: cov = var(array(X))
         else: cov = X.cov()
     if shrink:
         if (shrink <= 0) or (shrink >= 1):
             print 'Warning: The shrinkage parameter is not between 0 and 1.'
         cov = ShrinkCovs(cov, delta=shrink)
     return DataFrame(cov, index=X.columns, columns=X.columns)
Example #2
0
 def test_cov_numeric_only(self, numeric_only):
     # when dtypes of pandas series are different
     # then ndarray will have dtype=object,
     # so it need to be properly handled
     df = DataFrame({"a": [1, 0], "c": ["x", "y"]})
     expected = DataFrame(0.5, index=["a"], columns=["a"])
     if numeric_only:
         result = df.cov(numeric_only=numeric_only)
         tm.assert_frame_equal(result, expected)
     else:
         with pytest.raises(ValueError,
                            match="could not convert string to float"):
             df.cov(numeric_only=numeric_only)
Example #3
0
 def test_cov_nullable_integer(self, other_column):
     # https://github.com/pandas-dev/pandas/issues/33803
     data = DataFrame({"a": pd.array([1, 2, None]), "b": other_column})
     result = data.cov()
     arr = np.array([[0.5, 0.5], [0.5, 1.0]])
     expected = DataFrame(arr, columns=["a", "b"], index=["a", "b"])
     tm.assert_frame_equal(result, expected)
Example #4
0
def _get_initialized_parameter(
        data: pd.DataFrame,
        n_observed: int,
        n_latent: int,
        biased: bool = False) -> Dict[str, torch.nn.Parameter]:
    """Similar to lavaan's simple."""
    scale = 1.0
    if biased:
        scale = (data.shape[0] - 1.0) / data.shape[0]

    return {
        "lambda_y":
        torch.nn.Parameter(
            torch.ones((n_observed, n_latent), dtype=torch.double)),
        "beta":
        torch.nn.Parameter(
            torch.zeros((n_latent, n_latent), dtype=torch.double)),
        "psi":
        torch.nn.Parameter(torch.eye(n_latent, dtype=torch.double)),
        "theta":
        torch.nn.Parameter(
            torch.from_numpy((data.cov().abs() * scale / 2.0).values *
                             np.eye(n_observed)).double().clamp(min=0.1)),
        "alpha":
        torch.nn.Parameter(torch.zeros((n_latent, 1), dtype=torch.double)),
        "nu":
        torch.nn.Parameter(
            torch.from_numpy(data.mean().values[:, None]).double()),
    }
def max_sharpe_ratio_strat(
        df_oneyear_return: pd.DataFrame,
        risk_free_rate=default_risk_free_rate) -> pd.DataFrame:

    df_1_mean_return = df_oneyear_return.mean(
    )  #daily mean return in the one year
    df_1_cov = df_oneyear_return.cov()
    assets_num = len(df_oneyear_return.columns)
    bounds = tuple((0.0, 1.0) for i in range(assets_num))
    guess = np.array(assets_num * [1 / assets_num])

    def negative_sharpe(w, df_1_mean_return, df_1_cov, risk_free_rate):
        vol = np.sqrt(reduce(np.dot, [w.T, df_1_cov, w])) * np.sqrt(252)
        ret = np.dot(w, df_1_mean_return) * 252
        return -(ret - risk_free_rate) / vol

    args = (df_1_mean_return, df_1_cov, risk_free_rate)
    constraints = [{'type': 'eq', 'fun': lambda x: sum(x) - 1}]
    min_neg_sharpe = solver.minimize(fun=negative_sharpe,
                                     x0=guess,
                                     args=args,
                                     constraints=constraints,
                                     bounds=bounds,
                                     method='SLSQP')
    return -min_neg_sharpe.fun, min_neg_sharpe.x
        def construct_portfolio(self):
            """

            :return:
            """
            pre_date_data = w.tdaysoffset(-self.window, self.date, "Period=M")
            pre_date = pre_date_data.Data[0][0].strftime("%Y-%m-%d")
            tradedays_data = w.tdays(pre_date, self.date, "Period=M")
            tradedayslist = tradedays_data[0]
            tradedays = [td.strftime("%Y-%m-%d") for td in tradedayslist]
            # 提取因子数据
            style_return = DataFrame()
            for f in self.factors:
                f_data = []
                for dt in tradedays:
                    stockcodes = StockPool(dt).select_stock()
                    f_data = f(dt, stockcodes).getdata()
                    f_ret = FactorProcess.get_alpha(stockcodes, dt,
                                                    -1)  # 选取一个月的alpha
                    df = DataFrame(data=[f_data, f_ret],
                                   columns=[f.windLabel, 'ret'])
                    long_only, long_short = FactorStyle.compute_style_return_month(
                        df, f.windLabel)
                    f_data.append(long_only)
                style_return[f.windLabel] = f_data
            S = matrix(style_return.cov().values)
            pbar = matrix(np.zeros_like(style_return.std().values))
            n = len(self.factors)
            G = matrix(0.0, (n, n))
            G[::n + 1] = -1.0
            h = matrix(0.0, (n, 1))
            A = matrix(1.0, (1, n))
            b = matrix(1.0)
            portfolio_weight = qp(S, -pbar, G, h, A, b)['x']
Example #7
0
    def test_cov(self, float_frame, float_string_frame):
        # min_periods no NAs (corner case)
        expected = float_frame.cov()
        result = float_frame.cov(min_periods=len(float_frame))

        tm.assert_frame_equal(expected, result)

        result = float_frame.cov(min_periods=len(float_frame) + 1)
        assert isna(result.values).all()

        # with NAs
        frame = float_frame.copy()
        frame.iloc[:5, frame.columns.get_loc("A")] = np.nan
        frame.iloc[5:10, frame.columns.get_loc("B")] = np.nan
        result = frame.cov(min_periods=len(frame) - 8)
        expected = frame.cov()
        expected.loc["A", "B"] = np.nan
        expected.loc["B", "A"] = np.nan
        tm.assert_frame_equal(result, expected)

        # regular
        result = frame.cov()
        expected = frame["A"].cov(frame["C"])
        tm.assert_almost_equal(result["A"]["C"], expected)

        # exclude non-numeric types
        with tm.assert_produces_warning(
                FutureWarning, match="The default value of numeric_only"):
            result = float_string_frame.cov()
        expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].cov()
        tm.assert_frame_equal(result, expected)

        # Single column frame
        df = DataFrame(np.linspace(0.0, 1.0, 10))
        result = df.cov()
        expected = DataFrame(np.cov(df.values.T).reshape((1, 1)),
                             index=df.columns,
                             columns=df.columns)
        tm.assert_frame_equal(result, expected)
        df.loc[0] = np.nan
        result = df.cov()
        expected = DataFrame(
            np.cov(df.values[1:].T).reshape((1, 1)),
            index=df.columns,
            columns=df.columns,
        )
        tm.assert_frame_equal(result, expected)
Example #8
0
    def test_cov(self, float_frame, float_string_frame):
        # min_periods no NAs (corner case)
        expected = float_frame.cov()
        result = float_frame.cov(min_periods=len(float_frame))

        tm.assert_frame_equal(expected, result)

        result = float_frame.cov(min_periods=len(float_frame) + 1)
        assert isna(result.values).all()

        # with NAs
        frame = float_frame.copy()
        frame["A"][:5] = np.nan
        frame["B"][5:10] = np.nan
        result = float_frame.cov(min_periods=len(float_frame) - 8)
        expected = float_frame.cov()
        expected.loc["A", "B"] = np.nan
        expected.loc["B", "A"] = np.nan

        # regular
        float_frame["A"][:5] = np.nan
        float_frame["B"][:10] = np.nan
        cov = float_frame.cov()

        tm.assert_almost_equal(cov["A"]["C"],
                               float_frame["A"].cov(float_frame["C"]))

        # exclude non-numeric types
        result = float_string_frame.cov()
        expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].cov()
        tm.assert_frame_equal(result, expected)

        # Single column frame
        df = DataFrame(np.linspace(0.0, 1.0, 10))
        result = df.cov()
        expected = DataFrame(np.cov(df.values.T).reshape((1, 1)),
                             index=df.columns,
                             columns=df.columns)
        tm.assert_frame_equal(result, expected)
        df.loc[0] = np.nan
        result = df.cov()
        expected = DataFrame(
            np.cov(df.values[1:].T).reshape((1, 1)),
            index=df.columns,
            columns=df.columns,
        )
        tm.assert_frame_equal(result, expected)
def gmv(returns: pd.DataFrame):
    """
    Returns the weights of the Global Minimum Volatility portfolio
    given a covariance matrix
    """
    cov = returns.cov()
    n = cov.shape[0]
    return msr(0, np.repeat(1, n), cov)
Example #10
0
 def test_cov_ddof(self, test_ddof):
     # GH#34611
     np_array1 = np.random.rand(10)
     np_array2 = np.random.rand(10)
     df = DataFrame({0: np_array1, 1: np_array2})
     result = df.cov(ddof=test_ddof)
     expected_np = np.cov(np_array1, np_array2, ddof=test_ddof)
     expected = DataFrame(expected_np)
     tm.assert_frame_equal(result, expected)
class CalWeight:
    def __init__(self, step, risk_aversion):
        self.risk_aversion = risk_aversion
        if step == 0:
            self.start = '2012-01-01'
            self.end = '2014-12-31'
        elif step == 1:
            self.start = '2012-01-01'
            self.end = '2015-2-28'
        elif step == 2:
            self.start = '2012-01-01'
            self.end = '2015-4-30'
        secIDs = [
            '000300.ZICN', '000905.ZICN', '399006.ZICN', 'SPX.ZIUS',
            '000012.ZICN', '000013.ZICN'
        ]
        self.rtn_table = DataFrame()
        for secID in secIDs:
            cp = self.get_return(secID)
            cp.name = secID
            self.rtn_table = pd.concat([self.rtn_table, cp], axis=1)
        self.rtn_table.fillna(0, inplace=True)
        self.cov_mat = self.rtn_table.cov() * 250
        self.exp_rtn = self.rtn_table.mean() * 250

    def get_return(self, ticker):
        tmp_lst = []
        fname = PERFIX + 'data_' + ticker + '.csv'
        with open(fname, 'r') as f:
            reader = csv.reader(f)
            for row in reader:
                tmp_lst.append(row)
        df = pd.DataFrame(tmp_lst[1:], columns=tmp_lst[0])
        df['Date'] = pd.to_datetime(df['Date'])
        df = df.set_index("Date")
        df = df[self.start:self.end]
        temp = df['Close'].astype('float64').pct_change().fillna(0.)
        return temp

    def get_weight(self):
        risk_aversion = self.risk_aversion
        P = risk_aversion * matrix(self.cov_mat.values)
        q = -1 * matrix(self.exp_rtn.values)
        G = matrix(
            np.vstack((np.diag(np.ones(len(self.exp_rtn))),
                       np.diag(-np.ones(len(self.exp_rtn))))))
        h = matrix(
            np.array([np.ones(len(self.exp_rtn)),
                      np.zeros(len(self.exp_rtn))
                      ]).reshape(len(self.exp_rtn) * 2, 1))
        A = matrix(np.ones(len(self.exp_rtn)), (1, len(self.exp_rtn)))
        b = matrix([1.0])
        solvers.options['show_progress'] = False
        sol = solvers.qp(P, q, G, h, A, b)
        return DataFrame(index=self.exp_rtn.index,
                         data=np.round(sol['x'], 2),
                         columns=['weight'])  # 权重精确到小数点后两位
Example #12
0
 def get_portfolio_risk(cls, weights: list, ror: pd.DataFrame) -> float:
     """
     Computes the std of portfolio returns.
     """
     # cls.weights_sum_is_one(weights)
     if isinstance(ror, pd.Series):  # required for a single asset portfolio
         return ror.std()
     weights = np.array(weights)
     covmat = ror.cov()
     return math.sqrt(weights.T @ covmat @ weights)
Example #13
0
    def myconnect(self):
        user_text1 = str(self.host_le.text())
        user_text2 = str(self.user_le.text())
        user_text3 = str(self.password_le.text())
        user_text4 = str(self.db_le.text())

        try:
            mcon = MySQLdb.connect(host=user_text1, user=user_text2, passwd=user_text3, db=user_text4)
            self.browser.setText("[*] Welcome, connection successful.")
            text, ok = QInputDialog.getText(self, "Table Name", "Enter table name:")
            if ok and text:
                tb_name = str(text)
                try:
                    sq_tb = pis.read_sql('select * from '+ ' %s ' % tb_name, mcon)
                    df = DataFrame(sq_tb)
                    mcon.close()
                    size = str(len(df))
                    stat_description = df.describe()
                    stats = str(stat_description)
                    kt = str(df.kurt())
                    skew = str(df.skew())
                    cov = str(df.cov())
                    corr = str(df.corr())
                    head = str(df.head())
                    tail = str(df.tail())
                    summation = str(stat_description.sum())
                    self.browser1.setText("Size: " +"%s " %size +"\n"\
                                          +"Statistics:" +"\n"\
                                          +" %s " %stats +"\n"\
                                          +"Kurt:" +"\n"\
                                          +"%s" %kt +"\n"\
                                          +"Skew:" +"\n"\
                                          +"%s" %skew +"\n"\
                                          +"Covarriance:" +"\n"\
                                          +"%s" %cov +"\n"\
                                          +"Correlation:" +"\n"\
                                          +"%s" %corr +"\n"\
                                          +"Summation:" +"\n"\
                                          +"%s" %summation +"\n"\
                                          +"Head:" +"\n"\
                                          +"%s" % head +"\n"\
                                          +"Tail:" +"\n"\
                                          +"%s" %tail)
                    self.browser.setText(stats)
                    self.host_le.clear()
                    self.user_le.clear()
                    self.password_le.clear()
                    self.db_le.clear()
                    
                except Exception, e:
                    self.browser.setText("[*] Ensure that the table name is correct and try again.")
        except Exception, e:
            self.browser.setText("Please specify correct connection details and try again")
def epochs_sim_agg_returns_cov_market_data(returns: pd.DataFrame) -> pd.Series:
    """Computes the aggregated distribution of returns for a market.

    :param returns: dataframe with the simulated returns.
    :type returns: pd.DataFrame
    """

    function_name: str = epochs_sim_agg_returns_cov_market_data.__name__
    epochs_sim_tools.function_header_print_data(function_name, [""],
                                                "",
                                                "",
                                                "",
                                                sim=True)

    print("Size of time series and number of companies: ", returns.shape)

    cov: pd.DataFrame = returns.cov()
    # eig_vec:  eigenvector, eig_val: eigenvalues
    eig_val, eig_vec = np.linalg.eigh(cov)

    # rot: rotation, scal: scaling
    rot, scale = eig_vec, np.diag(1 / np.sqrt(eig_val))
    # trans: transformation matrix
    # trans = rot . scal
    trans = rot.dot(scale)

    trans_returns: pd.DataFrame = returns.dot(trans)
    trans_returns.columns = returns.columns

    one_col: List[pd.Series] = []

    for col in trans_returns.columns:

        one_col.append(trans_returns[col])

    agg_returns: pd.Series = pd.concat(one_col, ignore_index=True)

    # remove NaN and Inf
    agg_returns_list: List[float] = [
        x for x in agg_returns if not math.isnan(x) and not math.isinf(x)
    ]
    # filter out values greater than 10 or smaller than -10
    agg_returns_list = [x for x in agg_returns_list if -10 <= x <= 10]

    agg_returns_series: pd.Series = pd.Series(agg_returns_list)
    print(f"mean = {agg_returns_series.mean()}")
    print(f"std  = {agg_returns_series.std()}")

    del returns
    del trans_returns
    del one_col

    return agg_returns_series
Example #15
0
def pca(df: DataFrame, file_path: str, eigenvalues_condition: Callable[[float],
                                                                       bool]):
    """
    Transforma un dataset en otro con menos dimensiones mediante PCA y permite guardarlo en un archivo csv.
    Implementacion basada en el documento 'A tutorial on Principal Components Analysis' de Lindsay I Smith

    :param df: dataset con atributos solamente numericos y sin el atributo objetivo
    :param file_path: ruta relativa al archivo csv en donde se guardara el resultado
    :param eigenvalues_condition: funcion booleana para filtrar los valores propios (y con estos los vectores propios
        asociados) que se usaran para generar la matriz row_feature_vector (ver documento).
    """

    # se omite el primer paso asumiendo que los datos cumplen las precondiciones

    # segundo paso: resta de los promedios
    row_data_adjust = DataFrame()
    means = []
    for a in df.columns.values:
        means.append(df[a].mean())
    for (i, a) in enumerate(df.columns.values):
        row_data_adjust[a] = df[a] - means[i]

    # tercer paso: calculo de matriz de covarianzas
    C = row_data_adjust.cov()

    # cuarto paso: calculo de valores y vectores propios de la matriz de covarianzas
    U, Sigma, V = randomized_svd(C.as_matrix(),
                                 n_components=C.shape[0],
                                 n_iter=5,
                                 random_state=None)

    # quinto paso: eleccion de componentes para formar el vector de caracteristicas
    order = (-Sigma).argsort()
    Sigma = Sigma[order]
    U = U[:, order]
    filtered_indices = [
        i for i in range(len(Sigma)) if eigenvalues_condition(Sigma[i])
    ]
    row_feature_vector = U[:, filtered_indices].transpose()

    # sexto paso : derivacion del nuevo dataset
    row_data_adjust = row_data_adjust.as_matrix()\
        .transpose()
    # noinspection PyUnresolvedReferences
    final_data = np.matmul(row_feature_vector, row_data_adjust)
    final_data = final_data.transpose()

    # se guarda en un csv
    final_data = DataFrame(final_data)
    final_data.to_csv(file_path, index=False, encoding='utf-8')
Example #16
0
def annual_covar(excess_returns: pd.DataFrame) -> pd.DataFrame:
    "annualized covariance of excess returns"
    ann_covar = excess_returns.cov() * 252
    print('Condition number of annualized covariance matrix is:',
          np.linalg.cond(ann_covar))
    try:
        eigvals, __ = np.linalg.eig(ann_covar)
    except:
        print('Error in Eigen decomposition of covariance matrix')
        eigvals = []
        sys.exit(-1)
    if min(eigvals) <= 0:
        print('Error!  Negative eigenvalues in covariance matrix detected!')
        sys.exit(-1)
    return ann_covar
def calc_kelly_leverages(securities: Set[str],
                         start_date: date,
                         end_date: date,
                         risk_free_rate: float = 0.04) -> Dict[str, float]:
    """Calculates the optimal leverages for the given securities and
    time frame. Returns a list of (security, leverage) tuple with the
    calculate optimal leverages.

    Note: risk_free_rate is annualized
    """
    f = {}
    ret = {}
    excess_return = {}

    # Download the historical prices from Yahoo Finance and calculate the
    # excess return (return of security - risk free rate) for each security.
    for symbol in securities:
        try:
            hist_prices = get_historical_data(symbol,
                                              start=start_date,
                                              end=end_date,
                                              output_format='pandas')
        except IOError as e:
            raise ValueError(f'Unable to download data for {symbol}. '
                             f'Reason: {str(e)}')

        f[symbol] = hist_prices

        ret[symbol] = hist_prices['close'].pct_change()
        # risk_free_rate is annualized
        excess_return[symbol] = (ret[symbol] - (risk_free_rate / 252))

    # Create a new DataFrame based on the Excess Returns.
    df = DataFrame(excess_return).dropna()

    # Calculate the CoVariance and Mean of the DataFrame
    C = 252 * df.cov()
    M = 252 * df.mean()

    # Calculate the Kelly-Optimal Leverages using Matrix Multiplication
    F = inv(C).dot(M)

    # Return a list of (security, leverage) tuple
    return {
        security: leverage
        for security, leverage in zip(df.columns.values.tolist(), F)
    }
def calc_kelly_leverages(securities: Set[str],
                         start_date: date,
                         end_date: date,
                         risk_free_rate: float = 0.04) -> Dict[str, float]:
    """Calculates the optimal leverages for the given securities and
    time frame. Returns a list of (security, leverage) tuple with the
    calculate optimal leverages.

    Note: risk_free_rate is annualized
    """
    f = {}
    ret = {}
    excess_return = {}

    # Download the historical prices from Yahoo Finance and calculate the
    # excess return (return of security - risk free rate) for each security.
    for symbol in securities:
        try:
            hist_prices = get_historical_data(
                symbol, start=start_date, end=end_date,
                output_format='pandas')
        except IOError as e:
            raise ValueError(f'Unable to download data for {symbol}. '
                             f'Reason: {str(e)}')

        f[symbol] = hist_prices

        ret[symbol] = hist_prices['close'].pct_change()
        # risk_free_rate is annualized
        excess_return[symbol] = (ret[symbol] - (risk_free_rate / 252))

    # Create a new DataFrame based on the Excess Returns.
    df = DataFrame(excess_return).dropna()

    # Calculate the CoVariance and Mean of the DataFrame
    C = 252 * df.cov()
    M = 252 * df.mean()

    # Calculate the Kelly-Optimal Leverages using Matrix Multiplication
    F = inv(C).dot(M)

    # Return a list of (security, leverage) tuple
    return {security: leverage
            for security, leverage in zip(df.columns.values.tolist(), F)}
Example #19
0
def gen_pos_returns(weights: [float], returns: pd.DataFrame, type: str):
    if type is 'parametric':
        w = np.array(weights)
        mean = returns.mean().dot(weights)
        std = np.sqrt(w.T.dot(returns.cov()).dot(weights))

        # parametric calculations
        return 1 - norm.cdf(0, mean, std)

    elif type is 'historical':
        returns_weighted = (returns.dropna()) * weights
        total_returns = returns_weighted.apply(np.sum, axis=1).sort_values()
        array = np.asarray(total_returns)
        idx = (np.abs(array)).argmin()  # index of closest return = 0%
        prob = 1 - (idx) / len(array)  # 1 - probability return is less than 0%
        return prob

    else:
        return 'error'
Example #20
0
def generate_single_case(returns_df: pd.DataFrame, walk_length: int):
    assets_counts = len(returns_df.columns)
    cov = returns_df.cov()
    returns_mean = returns_df.mean()
    returns_std = returns_df.std()
    z_list = []

    for asset in returns_df.columns:
        mean = returns_mean[asset]
        std = returns_std[asset]
        standardized_returns = (returns_df[asset] - mean) / std
        z = np.random.choice(standardized_returns, walk_length)
        z_list.append([z])

    Z = np.concatenate(z_list)
    L = np.linalg.cholesky(cov)
    future_returns = np.full(
        (walk_length, assets_counts), returns_mean).T + np.dot(L, Z)
    return pd.DataFrame(future_returns.T, columns=returns_df.columns)
Example #21
0
def define_matrix_space(features: int, data: pd.DataFrame) -> pd.DataFrame:
    # Keep original data for later use. Copy values to apply normalization
    # data_norm = data_normalizer.stretch_to_unary(data)
    data_norm = normalizer.Map(lambda col: (col - col.mean()) / col.std(),
                               data)

    # Eigenvectors for the covariance matrix
    np_vec = np.linalg.eig(data.cov())[1]

    # Dividing row-wise to normalize vectors
    np_matrix = np_vec / np_vec[0][:, None][::-1]

    # select the number of features desired after transformation
    np_matrix = np_matrix[:features].T

    # Rotating data points with respect to the eigenvectors
    rotated_points = np.matmul(data_norm, np_matrix)

    return pd.DataFrame(rotated_points, columns=data.columns[:features])
def calc_kelly_leverages(securities,
                         start_date,
                         end_date,
                         risk_free_rate=0.04):
    """Calculates the optimal leverages for the given securities and time frame.
    Returns a list of (security, leverage) tuple with the calculate optimal leverages.

    Note: risk_free_rate is annualized
    """
    f = {}
    ret = {}
    excess_return = {}

    # Download the historical prices from Yahoo Finance and calculate the
    # excess return (return of security - risk free rate) for each security.
    for symbol in securities:
        try:
            hist_prices = web.DataReader(symbol, 'yahoo', start_date, end_date)
        except IOError as e:
            print('Unable to download data for %s. Reason: %s' %
                  (symbol, str(e)))
            return None

        f[symbol] = hist_prices

        ret[symbol] = hist_prices['Adj Close'].pct_change()
        excess_return[symbol] = (ret[symbol] - (risk_free_rate / 252)
                                 )  # risk_free_rate is annualized

    # Create a new DataFrame based on the Excess Returns.
    df = DataFrame(excess_return).dropna()

    # Calculate the CoVariance and Mean of the DataFrame
    C = 252 * df.cov()
    M = 252 * df.mean()

    # Calculate the Kelly-Optimal Leverages using Matrix Multiplication
    F = inv(C).dot(M)

    # Return a list of (security, leverage) tuple
    return zip(df.columns.values.tolist(), F)
Example #23
0
def gen_cond_var(weights: [float], returns: pd.DataFrame, conf_level: float):

    # VaR Historical totlat returns
    #indx = round((1-conf_level)*len(data.index)) # Percentile calculation
    #return data.iloc[indx]

    ###  CVaR parametric  ###
    w = np.array(weights)
    mean = returns.mean().dot(weights)
    std = np.sqrt(w.T.dot(returns.cov()).dot(weights))
    # parametric calculations
    # there's a (1-conf_level)% probability that we loss at least
    # var percent of our total portfolio value in a day
    var = norm.ppf((1 - conf_level), mean, std)

    # equation for expected value given var of normal distribution with known mean and std
    cvar = .5 * mean * erf(
        (var - mean) / (math.sqrt(2) * std)) - std * math.exp(
            -(var - mean)**2 /
            (2 * std**2)) / math.sqrt(2 * math.pi) + .5 * mean
    return cvar
Example #24
0
def get_pca(features_standard: pd.DataFrame,
            var_threshold: float = 0.95) -> Tuple[pd.Series, pd.DataFrame]:
    """
    get principle components
    """
    
    # get our eigenvalues and sort in order of size
    eig_val, eig_vec = np.linalg.eigh(features_standard.cov())
    idx = eig_val.argsort()[::-1]
    eig_val, eig_vec = eig_val[idx], eig_vec[:, idx]

    # wrap as pandas objects
    eig_val = pd.Series(eig_val, index=[f"pc_{1 + i}" for i in range(eig_val.shape[0])], name="eig_vals")
    eig_vec = pd.DataFrame(eig_vec, index=features_standard.columns, columns=eig_val.index)
    eig_vec = eig_vec.loc[:, eig_val.index]

    # reduce dimensions by getting rid of small eigenvalues
    cumulative_var = eig_val.cumsum()/eig_val.sum()
    cutoff = cumulative_var.values.searchsorted(var_threshold)
    eig_val, eig_vec = eig_val.iloc[:cutoff + 1], eig_vec.iloc[:, :cutoff + 1]
    return eig_val, eig_vec
def min_vol_strat(df_oneyear_return: pd.DataFrame) -> pd.DataFrame:
    df_1_mean_return = df_oneyear_return.mean(
    )  #daily mean return in the one year
    df_1_cov = df_oneyear_return.cov()
    assets_num = len(df_oneyear_return.columns)
    bounds = tuple((0.0, 1.0) for i in range(assets_num))
    guess = np.array(assets_num * [1 / assets_num])

    target_return = np.linspace(
        max(df_1_mean_return) * 252,
        min(df_1_mean_return) * 252, 50)

    def vol(w):
        return np.sqrt(reduce(np.dot, [w.T, df_1_cov, w])) * np.sqrt(252)

    min_vol_result = float('inf')
    min_vol_weight = []
    min_vol_return = []
    for i in target_return:
        constraints = [{
            'type': 'eq',
            'fun': lambda x: sum(x) - 1
        }, {
            'type': 'eq',
            'fun': lambda x: sum(x * df_1_mean_return) * 252 - i
        }]
        min_vol = solver.minimize(fun=vol,
                                  x0=guess,
                                  constraints=constraints,
                                  bounds=bounds,
                                  method='SLSQP')
        if min_vol.fun < min_vol_result:
            min_vol_result = min_vol.fun
            min_vol_weight = min_vol.x
            min_vol_return = i
        else:
            break

    return min_vol_return, min_vol_result, min_vol_weight
        def construct_portfolio(self):
            """

            :return:
            """
            pre_date_data = w.tdaysoffset(-self.window, self.date, "Period=M")
            pre_date = pre_date_data.Data[0][0].strftime("%Y-%m-%d")
            tradedays_data = w.tdays(pre_date, self.date, "Period=M")
            tradedayslist = tradedays_data[0]
            tradedays = [td.strftime("%Y-%m-%d") for td in tradedayslist]
            # 提取因子数据
            style_return = DataFrame()
            for f in self.factors:
                f_data = []
                for dt in tradedays:
                    stockcodes = StockPool(dt).select_stock()
                    f_data = f(dt, stockcodes).getdata()
                    f_ret = FactorProcess.get_alpha(stockcodes, dt,
                                                    -1)  # 选取一个月的alpha
                    df = DataFrame(data=[f_data, f_ret],
                                   columns=[f.windLabel, 'ret'])
                    long_only, long_short = FactorStyle.compute_style_return_month(
                        df, f.windLabel)
                    f_data.append(long_only)
                style_return[f.windLabel] = f_data
            S = style_return.cov().values
            n = len(self.factors)
            Sigma = S.T.dot(S)
            weight = Variable(n)
            gamma = Parameter(sign="positive")
            risk = quad_form(weight, Sigma)
            prob = Problem(Maximize(-gamma * risk),
                           [sum_entries(weight) == 1, weight >= 0])
            gamma.value = 1
            prob.solve()
            Weight = weight.value / np.sum(weight.value)
            return Weight
Example #27
0
def get_mode(arr):  
    mode = [];  
    arr_appear = dict((a, arr.count(a)) for a in arr);  # 统计各个元素出现的次数  
    if max(arr_appear.values()) == 1:  # 如果最大的出现为1  
        return;  # 则没有众数  
    else:  
        for k, v in arr_appear.items():  # 否则,出现次数最大的数字,就是众数  
            if v == max(arr_appear.values()):  
                mode.append(k);  
    return mode;  

get_mode(a)

var(a)
std(a)

a=Series(a)
a.skew()
a.kurt()
a.describe()

df = DataFrame({'data1' : np.random.randn(5),
                'data2' : np.random.randn(5)})
df.cov()
df.corr()

###假设检验
from scipy import stats as ss
df=DataFrame({'data':[10.1,10,9.8,10.5,9.7,10.1,9.9,10.2,10.3,9.9]})
ss.ttest_1samp(a = df, popmean = 10)
Example #28
0
    def test_corr_int(self):
        # dtypes other than float64 GH#1761
        df3 = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]})

        df3.cov()
        df3.corr()
Example #29
0
            3   3.5 0.5
        ]

    calculate var, cov, corr, cov-matrix, corr-matrix
"""

data = {'x1': [1, 2], 'x2': [2, 3], 'x3': [2, 1]}
A = DataFrame(data)
print A

data1 = Series([3, 3.5, 0.5], index=['x1', 'x2', 'x3'])
A = A.append(data1, ignore_index=True)
print '\n', A, '\n'

var_x1 = A['x1'].var()
var_x2 = A['x2'].var()
var_x3 = A['x3'].var()

print 'var_x1: %f' % var_x1
print 'var_x2: %f' % var_x2
print 'var_x3: %f' % var_x3

cov_matrix = A.cov()
print '\ncov_matrix:\n', cov_matrix

corr_matrix = A.corr()
print '\ncorr_matrix:\n', corr_matrix

print "\nA['x1'].corr(A['x2']): %f" % A['x1'].corr(A['x2'])
print "A['x1'].corr(A['x3']): %f" % A['x1'].corr(A['x3'])
    },
    index=list('abcd'))
frame.ix['f'] = np.random.randn(4)
frame['loc'] = ['ST', 'MO'] * 3
frame.sort_index(axis=1)
frame.sort_values(by=['loc', 'STL'])
frame.rank(axis=0)
frame.rank(method='max')
um.order()
um.rank()
frame.add(frame2)
frame.corr(um)
frame.fillna(1, inplace='True')
um = frame['UM']
frame.corr()
frame.cov()
frame2.ix['f'] = np.random.randn(3)
frame.corrwith(frame2)
frame.corrwith(um)
frame.corrwith(um.to_frame())
frame.ix[:, 'Washu':'UMST'].apply(lambda x: x.mean())
frame.set_index('UM', drop=True, inplace=True)
keys = frame.index
frame.reset_index(level=keys)

df = DataFrame(np.random.randn(6, 5),
               columns=['Ohio', 'Dallas', 'Michigan', 'Miami', 'DC'],
               index=[['a', 'a', 'b', 'b', 'c', 'd'], [1, 2, 3, 1, 2, 3]])
df.index
df.ix['a']
df.sortlevel(level=0, axis=0)
        try:
            hist_prices = web.DataReader(symbol, 'yahoo', start_date, end_date)
        except IOError, e:
            print 'Unable to download data for %s. Reason: %s' % (symbol, str(e))
            return None

        f[symbol] = hist_prices

        ret[symbol] = hist_prices['Adj Close'].pct_change()
        excess_return[symbol] = (ret[symbol] - (risk_free_rate / 252))  # risk_free_rate is annualized

    # Create a new DataFrame based on the Excess Returns.
    df = DataFrame(excess_return).dropna()

    # Calculate the CoVariance and Mean of the DataFrame
    C = 252 * df.cov()
    M = 252 * df.mean()

    # Calculate the Kelly-Optimal Leverages using Matrix Multiplication
    F = inv(C).dot(M)

    # Return a list of (security, leverage) tuple
    return zip(df.columns.values.tolist(), F)


def main():
    """Entry point of Kelly Criterion calculation."""

    print "Kelly Criterion calculation"
    args = docopt(__doc__, sys.argv[1:])
for i in range(len(exchange_rates)-1):
    row = {}
    for tp, cur in zip(metadata['Time Period'], metadata['Currency:']):
        x1 = float(exchange_rates[tp][i])
        x2 = float(exchange_rates[tp][i+1])

        if cur == 'USD':
            x1 = 1.0 / x1
            x2 = 1.0 / x2

        # Returns are in units of %.
        row[tp] = 100 * (x1 - x2) / x2
    rows.append(row)

returns = DataFrame(data=rows, columns=list(metadata['Time Period']))
returns_cov = returns.cov()

# Means are the expected returns for each currency.
exp_returns =  concat({'mean': returns.mean(), 'variance': returns.var()}, axis = 1)

class CurrencyPortfolio(YhatModel):
    @preprocess(in_type=dict, out_type=dict)
    def execute(self, data):
        P = matrix(data['risk_aversion'] * returns_cov.as_matrix())
        q = matrix(-exp_returns['mean'].as_matrix())
        G = matrix(0.0, (len(q),len(q)))
        G[::len(q)+1] = -1.0
        h = matrix(0.0, (len(q),1))
        A = matrix(1.0, (1,len(q)))
        b = matrix(1.0)
obj.reindex()

data = DataFrame([[1,2,3],[4,5,6]])
data.drop()

np.argsort()

obj.rank()

obj.sort_values()


data.tail()

data.cov()

data.cov()

data.corr()

data.dropna()

data.loc


data.fillna()

data.unstack()

Example #34
0
pd.Index

obj = Series([1, 2, 3])

obj.reindex()

data = DataFrame([[1, 2, 3], [4, 5, 6]])
data.drop()

np.argsort()

obj.rank()

obj.sort_values()

data.tail()

data.cov()

data.cov()

data.corr()

data.dropna()

data.loc

data.fillna()

data.unstack()
Example #35
0
def _calc_covariance_matrix(ts_df: DataFrame,
                            frequency: int = 252) -> DataFrame:
    return ts_df.cov() * frequency
#rtn_table.head(5)

#rtn_table.mean()*250

#rtn_table.corr()
print(rtn_table.mean() * 250)
print(rtn_table.corr())

print("*************************************************")

from cvxopt import matrix, solvers

portfolio1 = [0, 1, 2, 4, 5]
portfolio2 = range(6)
cov_mat = rtn_table.cov() * 250
exp_rtn = rtn_table.mean() * 250


def cal_efficient_frontier(portfolio):

    if len(portfolio) <= 2 or len(portfolio) > 6:
        raise Exception('portfolio必须为长度大于2小于7的list!')
    #print(cov_mat)
    cov_mat1 = cov_mat.iloc[portfolio, portfolio]
    exp_rtn1 = exp_rtn.iloc[portfolio]
    max_rtn = max(exp_rtn1)
    min_rtn = min(exp_rtn1)
    risks = []
    returns = []
Example #37
0
print(df5)

import math


def int_float_squares(series):
    return pd.Series({"int_sq": series["int_col"] ** 2, "flt_sq": series["float_col"] ** 2})


print(df.apply(int_float_squares, axis=1))

### 7. Basic Stats ###

print(df.describe())
print(df.cov())
print(df.corr())

### 8. Merge and Join ###

print(df)
other = DataFrame({"str_col": ["a", "b"], "some_val": [1, 2]})
print(other)
print(pd.merge(df, other, on="str_col", how="inner"))
print(pd.merge(df, other, on="str_col", how="outer"))
print(pd.merge(df, other, on="str_col", how="left"))
print(pd.merge(df, other, on="str_col", how="right"))

### 9. Plot ###

plot_df = DataFrame(np.random.randn(1000, 2), columns=["x", "y"])
print "*"*15

print "Definimos de nuevo el dataframe"
df = pd.DataFrame(data={"A":[1,2], "B":[2.6,1.3]})
print df
print "añadimos columnas combinando las actuales"
df["C"] = df["A"]+df["B"]
df["D"] = df["A"]*3
df["E"] = np.sqrt(df["A"])
print df
print "*"*15
print "Datos disponibles de un dataframe"
print " descripcion del dataframe"
print df.describe()
print " covarianza "
print df.cov()
print " correlación "
print df.corr()
print "*"*15

print " Creamos otro dataframe con valores aleatorios (1000 filas y 2 columnas "
print " DataFrame(np.random.randn(1000,2),columns=['x','y'])"
plot_df = DataFrame(np.random.randn(1000,2),columns=['x','y'])
print plot_df
print "Mostramos las graficas"
plot_df.plot()
plot_df.hist()