Example #1
0
    def test_pct_change_periods_freq(self):
        # GH 7292
        rs_freq = self.tsframe.pct_change(freq='5B')
        rs_periods = self.tsframe.pct_change(5)
        assert_frame_equal(rs_freq, rs_periods)

        rs_freq = self.tsframe.pct_change(freq='3B', fill_method=None)
        rs_periods = self.tsframe.pct_change(3, fill_method=None)
        assert_frame_equal(rs_freq, rs_periods)

        rs_freq = self.tsframe.pct_change(freq='3B', fill_method='bfill')
        rs_periods = self.tsframe.pct_change(3, fill_method='bfill')
        assert_frame_equal(rs_freq, rs_periods)

        rs_freq = self.tsframe.pct_change(freq='7B',
                                          fill_method='pad',
                                          limit=1)
        rs_periods = self.tsframe.pct_change(7, fill_method='pad', limit=1)
        assert_frame_equal(rs_freq, rs_periods)

        rs_freq = self.tsframe.pct_change(freq='7B',
                                          fill_method='bfill',
                                          limit=3)
        rs_periods = self.tsframe.pct_change(7, fill_method='bfill', limit=3)
        assert_frame_equal(rs_freq, rs_periods)

        empty_ts = DataFrame(index=self.tsframe.index,
                             columns=self.tsframe.columns)
        rs_freq = empty_ts.pct_change(freq='14B')
        rs_periods = empty_ts.pct_change(14)
        assert_frame_equal(rs_freq, rs_periods)
Example #2
0
    def test_pct_change_periods_freq(self, freq, periods, fill_method, limit):
        # GH 7292
        rs_freq = self.tsframe.pct_change(
            freq=freq, fill_method=fill_method, limit=limit
        )
        rs_periods = self.tsframe.pct_change(
            periods, fill_method=fill_method, limit=limit
        )
        assert_frame_equal(rs_freq, rs_periods)

        empty_ts = DataFrame(index=self.tsframe.index, columns=self.tsframe.columns)
        rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit)
        rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit)
        assert_frame_equal(rs_freq, rs_periods)
def replication_stats(df_price: pd.DataFrame, fund_name: str):
    df_price = df_price.resample('7D').first()
    rho = df_price.pct_change().corr(method="pearson")
    tau = df_price.pct_change().corr(method="kendall")
    returns_track = df_price.pct_change()
    returns_fund = df_price[fund_name].pct_change()
    df = pd.DataFrame()
    df['Correlation'] = rho[fund_name]
    df['Kendall tau'] = tau[fund_name]
    df['Tracking error'] = np.sqrt(52) * (returns_track.T - returns_fund.values).std(axis=1)
    df['R-squared'] = 1 - (returns_track.T - returns_fund.values).var(axis=1) / returns_fund.var()
    df['Sharpe ratio'] = np.sqrt(52) * returns_track.mean() / returns_track.std()
    df['Annual Return'] = (df_price.iloc[-1] / df_price.iloc[0]) ** (52 / len(df_price.index)) - 1
    df['Maximum Drawdown'] = max_drawdown(df_price.values)
    return df
Example #4
0
def calculate_daily_return(data: pd.DataFrame) -> pd.DataFrame:
    """Function to generate daily returns given input data (in dataframe, dtypes float, no time data)

    Example:
        >>> calculate_daily_return(data=pd.DataFrame([1,2,3,4]))
    """
    return data.pct_change(1).iloc[1:, ]
Example #5
0
def get_stocks(tickers, market, start_date, end_date, frequency):

    #Set Frequency for resampling
    FREQ_DICT = {
        'Weekly': 'W-FRI',
        'Monthly': 'M',
    }
    
    start_yahoo = datetime.datetime.strptime(start_date, '%d/%m/%Y')
    end_yahoo = datetime.datetime.strptime(end_date, '%d/%m/%Y')
    
    #Set market portfolio
    if (market != 'TA100') and (market != 'TA25'):
        if (market == 'SP500'):
            prices = DataFrame(web.get_data_yahoo('VFINX', start_yahoo, end_yahoo)['Adj Close'].resample(FREQ_DICT[frequency], how='last', fill_method='ffill'), columns=['SP500'])
        else:
            prices = DataFrame(web.get_data_yahoo(market, start_yahoo, end_yahoo)['Adj Close'].resample(FREQ_DICT[frequency], how='last', fill_method='ffill'), columns=[market])
    else:
        prices = get_index_price(index_id = market, start_date = start_date, end_date = end_date, frequency = frequency).resample(FREQ_DICT[frequency], how = 'last')
    
    #Set Stocks Prices
    i = 0
    while (i < len(tickers)):
        get_df_ticker = DataFrame(web.get_data_yahoo(tickers[i], start_yahoo, end_yahoo)['Adj Close'].resample(FREQ_DICT[frequency], how = 'last'), columns=[tickers[i]])
        prices = pd.concat([prices, get_df_ticker], join='outer', axis = 1)
      
        i += 1
    
    changes = prices.pct_change()
        
    return prices, changes[1:]
Example #6
0
def gen_fitness_value(weights: [float], universe_data: pd.DataFrame):

    # portfolio returns
    returns = universe_data.pct_change()

    # CVaR
    VaR = gen_cond_var(weights, returns, .95)

    # entropy
    n = returns.nunique().sum()  # number of unique returns all the universe
    p_x = 1 / ((returns.max().max() - returns.min().min()) + 1
               )  # discrete probability of a uniform distribution for universe
    # discrete shannon entropy is maximized when distribution is uniform, i.e maximum entropy in the universe
    # discrete renyi entropy is maximized when distribution is uniform, i.e maximum entropy in the universe
    max_entropy_shannon = -n * p_x * math.log(
        p_x, 2)  # discrete shannon equation for uniform distribution
    max_entropy_renyi = math.log(
        n * (p_x**2),
        2)  # discrete renyi entropy equation for uniform distribution
    entropy = gen_entropy(weights, returns, 'Renyi')

    # returns
    prob_pos_returns = gen_pos_returns(weights, returns, 'historical')

    # all objectives are unitless and are percentages from 0 to 1. Therefore they have about equal weight in
    # the fitness function. theoretical max fitness score = 0 + 1 + 1 = 2
    fitness_score = VaR + entropy / max_entropy_renyi + prob_pos_returns
    return round(fitness_score, 5)
Example #7
0
def calc_info_ratio(data: pd.DataFrame) -> pd.DataFrame:
    """Annual return from securities data(frame)"""
    daily_rtn = data.pct_change(1).iloc[1:, ]
    annual_rtn = np.mean(daily_rtn) * 252
    ann_vol = np.std(daily_rtn) * np.sqrt(252)
    info_ratio = np.divide(annual_rtn, ann_vol)
    return info_ratio
Example #8
0
def beta(ret_carteira: pd.DataFrame, ret_ibvsp: pd.DataFrame) -> float:
    """Calcula o beta da carteira, dados seus retornos diários e
    os retornos do ibovespa.

    Args:
        ret_carteira (pd.DataFrame): dataframe dos retornos diários
        da carteira.
        ret_ibvsp (pd.DataFrame): dataframe dos retornos diários do
        ibovespa.

    Returns:
        float: beta.
    """
    ret_carteira = ret_carteira.dropna()
    ret_ibvsp = ret_ibvsp.pct_change().dropna()

    df = pd.concat(
        [ret_carteira, ret_ibvsp],
        axis=1,
        join='inner'
    )

    Y = df.iloc[:,0]
    X = df.iloc[:,1]
    X = sm.add_constant(X)

    linear_model = sm.OLS(Y, X)
    return linear_model.fit().params[1]
Example #9
0
def calculate_financials(data:pd.DataFrame, rf:float=0.005)->pd.DataFrame:
    '''Return, annualized return, volatility, annualized volatility, risk-adjusted return, maximum drawdown'''
    invest_period = (data.index[-1] - data.index[0]).days
    invest_period_srs = pd.Series(dict(zip(data.columns, [invest_period for _ in data.columns])))
    start_srs = pd.Series(dict(zip(data.columns, [data.index[0].strftime("%Y-%m-%d") for _ in data.columns])))
    end_srs = pd.Series(dict(zip(data.columns, [data.index[-1].strftime("%Y-%m-%d") for _ in data.columns])))
    R = (data.iloc[-1] - data.iloc[0]) / data.iloc[0]
    AR = (1 + R) ** (365.25 / invest_period) - 1
    data1 = data.pct_change()
    data1.drop(data1.index[0], inplace=True)
    Vol = data1.std()
    AVol = np.sqrt(252) * Vol
    AdjR = AR / AVol
    MDD, MDD_dates = max_drawdown(data)

    '''
    香港時間1/11/2018
    早上11時15分的結算率。
    到期日	港元利息結算率

    隔夜	0.50000
    '''
    SR = (AR - rf) / AVol
    df = pd.concat([invest_period_srs, start_srs, end_srs, R, AR, Vol, AVol, AdjR, MDD, MDD_dates, SR], axis=1)
    df.columns = ['Days', 'Start', 'End', 'R', 'AR', 'Vol', 'AVol', 'AdjR', 'MDD', 'MDD_Date', 'SR']
    return df
Example #10
0
def get_returns(
    df: pd.DataFrame,
    return_type: ReturnType = "log",
) -> pd.Series:
    """Calculates return on a security.

    Args:
        df: Pandas dataframe with the prices used to calculate returns.
        return_type: Either `log`, `simple` or `diff` to specify how returns
            are calculated.

    Returns:
        Pandas series of return.
    """

    if return_type is not None:
        return_type = ReturnType(return_type)

    if return_type == ReturnType.LOG:
        returns = (df / df.shift(1)).apply(np.log).dropna()
    elif return_type == ReturnType.SIMPLE:
        returns = df.pct_change().dropna()
    elif return_type == ReturnType.DIFF:
        returns = df.diff().dropna()

    returns.columns = pd.MultiIndex.from_tuples(
        tuples=[(f"adj_return", security) for security in returns.columns],
        names=["series", "security"],
    )

    return returns
Example #11
0
def capm(y: pd.Series, bases: pd.DataFrame, rf=0., fee=0.):
    freq = _freq(y.index)
    rf = rf / freq
    fee = fee / freq
    R = y.pct_change() - rf
    R.name = y.name
    R_base = bases.pct_change().sub(rf, axis=0)

    # CAPM:
    # R = alpha + rf + beta * (Rm - rf)
    model = OLS.from_formula(f"Q('{y.name}') ~ {'+'.join(bases.columns)}",
                             R_base.join(R)).fit()

    alpha = model.params['Intercept'] * freq
    betas = model.params[bases.columns]

    # reconstruct artificial portfolio
    proxy = R_base @ betas + (1 - betas.sum()) * (rf + fee)
    cumproxy = (1 + proxy).cumprod()

    # residual portfolio
    r = y.pct_change() - cumproxy.pct_change()
    residual = (1 + r).cumprod()

    return {
        'alpha': alpha,
        'betas': betas,
        'cumproxy': cumproxy,
        'model': model,
        'residual': residual,
    }
    def risk_fraction(self, data: pd.DataFrame, n: int = 3):
        """
        Computes the cumulative risk fraction of system
        see ref: formula (6) of main paper
        :param data: (pd.DataFrame) end of month prices
            shape = (n_samples, p_shares)
        :param n: (int) Number of principal components (3 by default)
            assumes user has chosen the best n
        :return: (float)
        """
        # Store col names
        col_names = list(data)

        # Compute log returns
        data = np.log(1 + data.pct_change())
        data = self.sc.fit_transform(data.dropna())
        data = self.pca.fit_transform(data)
        self.transformed_data = pd.DataFrame(data, columns=col_names)

        # Total risk of system
        system_risk = np.sum(self.pca.explained_variance_)

        # Risk associated with first n principal components
        pca_risk = self.pca.explained_variance_[:n].sum() / system_risk

        return pca_risk
def monte_carlo(period: int, n_inc: int, n_sim: int, type: str,
                data: pd.DataFrame, assets: List[str]):
    df = pd.DataFrame()
    returns = data.pct_change()
    returns_mean = returns.mean()  # daily mean
    returns_std = returns.std()  # daily std

    for name in assets:
        # retrieve asset mean and std
        try:
            asset_mean = returns_mean[name]
            asset_std = returns_std[name]
            asset_last_price = data[name][-1]
        except:
            print('asset not in data frame')
            return

        #switch for monte carlo simulations
        if type is 'GBM':
            temp = gbm(asset_last_price, asset_mean, asset_std, period, n_inc,
                       n_sim, name)

        # append asset simulation to final data frame
        if df.empty:
            df = temp
        else:
            df = df.join(temp)

    return df
Example #14
0
def capm(y: pd.Series, bases: pd.DataFrame, rf=0.0, fee=0.0):
    freq = _freq(y.index)
    rf = rf / freq
    fee = fee / freq
    R = y.pct_change() - rf
    R.name = y.name
    R_base = bases.pct_change().sub(rf, axis=0)

    # CAPM:
    # R = alpha + rf + beta * (Rm - rf)
    model = OLS(R, R_base.assign(Intercept=1), missing="drop").fit()

    alpha = model.params["Intercept"] * freq
    betas = model.params[bases.columns]

    # reconstruct artificial portfolio
    proxy = R_base @ betas + (1 - betas.sum()) * (rf + fee)
    cumproxy = (1 + proxy).cumprod()

    # residual portfolio
    r = y.pct_change() - cumproxy.pct_change()
    residual = (1 + r).cumprod()

    return {
        "alpha": alpha,
        "betas": betas,
        "cumproxy": cumproxy,
        "model": model,
        "residual": residual,
    }
Example #15
0
 def _getDailyReturns(self, a_df: DataFrame = DataFrame()) -> DataFrame:
     # == (self._data / self._data.shift(1))-1
     #new_df: DataFrame = a_df.pct_change(1)
     new_df: DataFrame = a_df.pct_change()
     new_df.iloc[0, :] = 0
     new_df.columns = new_df.columns.str.replace(self._column,
                                                 'DailyReturns')
     return new_df
Example #16
0
def returns(prices: pd.DataFrame, which: str = 'daily', period: str = 'a'):
    """Retorna os retornos (diários/mensais/anuais) de prices, a depender de 'which'.

    Ex:
        - which = 'daily' (retornos diários)
        - which = 'monthly' (retornos mensais)
        - which = 'annual' (retornos anuais)
        - which = 'total' (variação total do período)
        - which = 'acm' (retornos acumulados)

    Args:
        prices (pd.DataFrame): dataframe dos preços de fechamento.
        which (str, optional): tipo de retorno desejado: Padrão: 'daily'.
        period (str, optional): válido somente para which = 'total';
        periodiza o retorno: (1 + r) ** period - 1. Padrão: 'a'.

    Returns:
        pd.DataFrame ou pd.Series.
    """
    r = prices.pct_change().dropna()
    if which == 'daily':
        return r
    elif which == 'monthly':
        # dataframe com multindex
        # np.log1p(r) = np.log(1 + r)
        # np.expm1(r) = np.exp(r - 1)
        m_rets = r.groupby([r.index.year, r.index.month
                            ]).apply(lambda x: np.expm1(np.log1p(x).sum()))

        # deixando o index como Y-m, em datetime
        m_rets.index = map(lambda d: dt.strptime(f'{d[0]}-{d[1]}', '%Y-%m'),
                           m_rets.index)
        m_rets.index = m_rets.index.to_period('M')
        return m_rets
    elif which == 'annual':
        a_rets = r.groupby(
            r.index.year).apply(lambda x: np.expm1(np.log1p(x).sum()))

        a_rets.index = pd.to_datetime(a_rets.index.astype(str)).to_period('Y')
        return a_rets
    elif which == 'total':
        rets = (prices.iloc[-1] - prices.iloc[0]) / prices.iloc[0]

        if period not in ('m', 'a'):
            return rets

        n_days = prices.shape[0]
        n_years = n_days / 252
        if period == 'm':
            return (1 + rets)**(1 / (12 * n_years)) - 1
        elif period == 'a':
            return (1 + rets)**(1 / n_years) - 1
        raise TypeError("Período inválido: 'm' ou 'a'.")
    elif which == 'acm':
        return (1 + r).cumprod()
    raise TypeError(
        "Tipo de retorno inválido: which -> 'daily', 'total', 'monthly, ou 'acm'."
    )
Example #17
0
    def test_pct_change_shift_over_nas(self):
        s = Series([1., 1.5, np.nan, 2.5, 3.])

        df = DataFrame({'a': s, 'b': s})

        chg = df.pct_change()
        expected = Series([np.nan, 0.5, np.nan, 2.5 / 1.5 - 1, .2])
        edf = DataFrame({'a': expected, 'b': expected})
        assert_frame_equal(chg, edf)
Example #18
0
    def test_pct_change_shift_over_nas(self):
        s = Series([1., 1.5, np.nan, 2.5, 3.])

        df = DataFrame({'a': s, 'b': s})

        chg = df.pct_change()
        expected = Series([np.nan, 0.5, np.nan, 2.5 / 1.5 - 1, .2])
        edf = DataFrame({'a': expected, 'b': expected})
        assert_frame_equal(chg, edf)
Example #19
0
def get_growth_rates_df(dataframe: pd.DataFrame) -> pd.DataFrame:
    """
    :param df: the original data frame, with missing values
    :return: initial dataset growth rates (nan: infinite or unavailable)
    """
    growth_rates = dataframe.pct_change(fill_method=None)
    # --- change inf values to na (will be dropped later)
    growth_rates.replace([np.inf, -np.inf], np.nan, inplace=True)
    return growth_rates
def make_stats_maxence(df_price: pd.DataFrame):
    df_return = df_price.pct_change().dropna()
    stats.describe(df_return)
    t_tstat, p_tstat = stats.ttest_rel(df_return.iloc[:, 0], df_return.iloc[:, 1])  # T-test
    t_KS, p_KS = stats.ks_2samp(df_return.iloc[:, 0], df_return.iloc[:, 1])  # KS -> p petit pas la meme distrib
    tau, p_tau = stats.kendalltau(df_return.iloc[:, 0], df_return.iloc[:, 1])  # Tau de Kendall

    return stats.describe(df_return), "t test: t = %g  p = %g" % (t_tstat, p_tstat), \
        "KS test: t = %g  p = %g" % (t_KS, p_KS), "KendallTau: t = %g  p = %g" % (tau, p_tau)
Example #21
0
    def test_pct_change_shift_over_nas(self):
        s = Series([1.0, 1.5, np.nan, 2.5, 3.0])

        df = DataFrame({"a": s, "b": s})

        chg = df.pct_change()
        expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2])
        edf = DataFrame({"a": expected, "b": expected})
        assert_frame_equal(chg, edf)
Example #22
0
def daily_returns(data: pd.DataFrame) -> pd.DataFrame:
    """Returns DataFrame with daily returns (percentage change)
    :Input:
     :data: ``pandas.DataFrame`` with daily stock prices
    :Output:
     :ret: a ``pandas.DataFrame`` of daily percentage change of Returns
         of given stock prices.
    """
    return data.pct_change().dropna(how="all").replace([np.inf, -np.inf], np.nan)
Example #23
0
    def test_pct_change_periods_freq(self, freq, periods, fill_method, limit):
        # GH 7292
        rs_freq = self.tsframe.pct_change(freq=freq,
                                          fill_method=fill_method,
                                          limit=limit)
        rs_periods = self.tsframe.pct_change(periods,
                                             fill_method=fill_method,
                                             limit=limit)
        assert_frame_equal(rs_freq, rs_periods)

        empty_ts = DataFrame(index=self.tsframe.index,
                             columns=self.tsframe.columns)
        rs_freq = empty_ts.pct_change(freq=freq,
                                      fill_method=fill_method,
                                      limit=limit)
        rs_periods = empty_ts.pct_change(periods,
                                         fill_method=fill_method,
                                         limit=limit)
        assert_frame_equal(rs_freq, rs_periods)
Example #24
0
def returns(df: pd.DataFrame, which: str='daily', period: str='a'):
    """Retorna um dataframe ou uma série dos retornos de df, a depender
    de 'which', diários, mensais ou anuais, a depender de 'period'.

    Ex: which = 'daily' retorna df.pct_change().dropna() (retornos diários);
    which = 'total' retorna (df.iloc[-1] - df.iloc[0]) / df.iloc[0]
    (retornos totais), que podem ser diários (period = 'd'), mensais
    (period = 'm') ou anuais (period = 'a');
    which = 'acm' retorna os retornos acumulados
    (1 + df.pct_change().dropna()).cumprod()

    Args:
        df (pd.DataFrame): dataframe dos preços.
        which (str, optional): tipo de retorno desejado: diário/total/
        acumulado ('daily'/'total'/'acm'). Padrão: 'daily'.
        period (str, optional): retorno diário/mensal/anual 'd'/'m'/'a'
        (válido somente para which = 'total'). Padrão: 'a'.

    Returns:
        pd.DataFrame ou pd.Series: a depender de 'which'; retornos diários
        (dataframe), totais (series) ou acumulados (dataframe).
    """
    if which == 'daily':
        return df.pct_change().dropna()
    elif which == 'total':
        s = (df.iloc[-1] - df.iloc[0]) / df.iloc[0]

        if period == 'a':
            return s

        start = df.index[0].strftime('%d/%m/%Y')
        end = df.index[-1].strftime('%d/%m/%Y')

        if period == 'd':
            s = (1 + s) ** (1/252) -1
            return (1 + s) ** (1 / time_fraction(start, end, 'd')) - 1
        elif period == 'm':
            s = (1 + s) ** (1/12) - 1
            return (1 + s) ** (1 / time_fraction(start, end, 'm')) - 1
    elif which == 'acm':
        return (1 + df.pct_change().dropna()).cumprod()
    raise "Tipo de retorno inválido: which -> 'daily', 'total' ou 'acm'."
Example #25
0
def returns(prices: pd.DataFrame, which: str='daily', period: str='a', scaled: bool=True):
    """Retorna um dataframe ou uma série dos retornos de prices, a depender
    de 'which', diários, mensais ou anuais, a depender de 'period'.

    Ex: which = 'daily' retorna prices.pct_change().dropna() (retornos diários);
    which = 'total' retorna (prices.iloc[-1] - prices.iloc[0]) / prices.iloc[0]
    (retornos totais), que podem ser diários (period = 'd'), mensais
    (period = 'm') ou anuais (period = 'a');
    which = 'acm' retorna os retornos acumulados
    (1 + prices.pct_change().dropna()).cumprod()

    Args:
        prices (pd.DataFrame): dataframe dos preços de fechamento.
        which (str, optional): tipo de retorno desejado: diário/total/
        acumulado ('daily'/'total'/'acm'). Padrão: 'daily'.
        period (str, optional): retorno diário/mensal/anual 'd'/'m'/'a'
        (válido somente para which = 'total'). Padrão: 'a'.

    Returns:
        pd.DataFrame ou pd.Series: a depender de 'which'; retornos diários
        (dataframe), totais (series) ou acumulados (dataframe).
    """
    if which == 'daily':
        return prices.pct_change().dropna()
    elif which == 'total':
        rets = (prices.iloc[-1] - prices.iloc[0]) / prices.iloc[0]

        if not scaled:
            return rets

        n_days = prices.shape[0]
        n_years = n_days / 252
        if period == 'm':
            return (1 + rets) ** (1 / (12 * n_years)) - 1
        elif period == 'a':
            return (1 + rets) ** (1 / n_years) - 1
        raise TypeError("Período inválido: 'm' ou 'a'.")
    elif which == 'acm':
        return (1 + prices.pct_change().dropna()).cumprod()
    raise TypeError("Tipo de retorno inválido: which -> 'daily', 'total' ou 'acm'.")
Example #26
0
 def _setSimpleReturnsTimely(self,
                             a_letter: str = '',
                             a_df: DataFrame = DataFrame()) -> DataFrame:
     if a_letter == 'W':
         return a_df.resample('W').ffill().pct_change()  #.to_frame()
     elif a_letter == 'M':
         return a_df.resample('M').ffill().pct_change()  #.to_frame()
     elif a_letter == 'Q':
         return a_df.resample('Q').ffill().pct_change()  #.to_frame()
     elif a_letter == 'A':
         return a_df.resample('A').ffill().pct_change()  #.to_frame()
     else:
         return a_df.pct_change()  #.to_frame()
Example #27
0
def ran_ports_avg_dly_ret(df,size_list,ports_names):
    """generates random portfolios with number of stock as listed in the size_list
    from the dataframe of a group of stocks, df, and returns the average daily returns 
    of the various portfolios"""
    df_r = df.pct_change()[1:]
    ports = []
    port_rets = pd.DataFrame()
    for s in size_list:
        port_pr = df_r.sample(n=s,replace=False,axis=1)
        ports.append(port_pr)
    for t in range(len(ports)):
        port_rets[t] = ports[t].mean(axis = 1)
    port_rets.columns = ports_names
    return port_rets
Example #28
0
def compute_vol(df: pd.DataFrame,
                span: int=100) -> pd.DataFrame:
    '''
    Compute period volatility of returns as exponentially weighted
    moving standard deviation:
    Args:
        df (pd.DataFrame): Dataframe with price series in a single column.
        span (int): Span for exponential weighting.
    Returns:
        pd.DataFrame: Dataframe containing volatility estimates.
    '''
    df.fillna(method='ffill', inplace=True)
    r = df.pct_change()
    return r.ewm(span=span).std()
Example #29
0
 def graph(self, period, portfolio=None, drop_components=False):
     data = {col: self.data[col] * (100 / self.data[col][self.start]) for col in self.data.columns}
     if portfolio:
         data['Portfolio'] = sum(data[st] * sh for st, sh in portfolio.items())
         data['Portfolio'] = data['Portfolio'] * (100 / data['Portfolio'][self.start])
         if drop_components:
             for st in portfolio:
                 del data[st]
     data = DataFrame(data)
     data.plot(figsize=(12, 8), grid=1)
     stat = (data.pct_change(period) * 100).describe().T
     stat['shrp'] = (stat['mean'] - RISK_FREE_RATE * period / 252) / stat['std']
     stat['drawdown'] = data.apply(self._max_drawdown)
     return stat.sort_values('shrp', ascending=False)
Example #30
0
    def test_pct_change_numeric(self):
        # GH#11150
        pnl = DataFrame(
            [np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange(0, 40, 10)]
        ).astype(np.float64)
        pnl.iat[1, 0] = np.nan
        pnl.iat[1, 1] = np.nan
        pnl.iat[2, 3] = 60

        for axis in range(2):
            expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift(axis=axis) - 1
            result = pnl.pct_change(axis=axis, fill_method="pad")

            tm.assert_frame_equal(result, expected)
def PnL(weights: pd.DataFrame, df: pd.DataFrame, returns_data=True):
    '''
    portfolio profit/loss on each day
    expected the weights to be a 1 x n df
    '''
    if weights.shape[0] != 1 or not isinstance(weights, pd.DataFrame):
        raise ValueError("weights should be a 1 x n DataFrame")
    if not returns_data:
        df = df.pct_change().dropna()
    weight_df = pd.DataFrame(weights.values.tolist() * df.shape[0],
                             columns=df.columns,
                             index=df.index)

    pnl = (df * weight_df).sum(axis=1).to_frame().rename(columns={0: 'PnL'})
    return pnl
def realized_volatility(price_df: pd.DataFrame, *vol_lag, annualized_factor: int = 252, allowed_number_na: int = 5) \
        -> pd.DataFrame:
    """Assumes price_df is a DataFrame filled with daily prices as values, tickers as column names and observation dates
    as index. Assumes that measurement_interval and annualized_factor is int and data_availability_threshold is a float.
    Returns a DataFrame with the rolling annualized realized volatility."""
    if min(vol_lag) < 2:
        raise ValueError("vol_lag needs to be an 'int' larger or equal to 2.")
    max_volatility_df = None
    for lag in vol_lag:
        return_df = price_df.pct_change(fill_method=None)
        volatility_sub_df = return_df.rolling(window=lag, min_periods=allowed_number_na).std() \
                            * (annualized_factor ** 0.5)
        if max_volatility_df is None:
            max_volatility_df = volatility_sub_df
        else:
            max_volatility_df = pd.concat(
                [max_volatility_df, volatility_sub_df]).max(level=0,
                                                            skipna=False)
    # before price starts publishing, value should be nan regardless of data_availability_threshold
    adjustment_df = price_df.pct_change().fillna(method='ffill').rolling(
        window=max(vol_lag)).mean().isnull()
    adjustment_df = np.where(adjustment_df, np.nan, 1)
    max_volatility_df *= adjustment_df
    return max_volatility_df
Example #33
0
def aagr(df: pd.DataFrame, window: int=10):  # TODO: don't include the window
    """average annual growth rate

    Parameters
    ----------
    window : `int`
        the rolling window size

    Returns
    -------
    return : `DataFrame`
        The rolling apply result
    """
    pct = df.pct_change()
    return pct.rolling(window).apply(np.mean).dropna()
Example #34
0
def show_rps(data: pd.DataFrame,
             interval: int = 1,
             start_index=None,
             show=False):
    rsp_data = data.pct_change(interval)

    if start_index:
        rsp_data = rsp_data.loc[rsp_data.index >= pd.to_datetime(start_index)]

    if show:
        plt.plot(rsp_data)
        plt.legend(data.columns, loc="best")
        plt.show()

    return rsp_data
import matplotlib.pyplot as plt 
from random import randint 
now = datetime.datetime.now()

list = '^GSPC'
start = None
while start is None:
    try:
        start = datetime.datetime(randint(1950,2015), randint(1,12), randint(1,31))
    except:
        pass 
end = datetime.datetime(now.year, now.month, now.day)

df = pd.io.data.get_data_yahoo(list, start, end)['Adj Close']
df = DataFrame(df)
df['Returns'] = df.pct_change()
df['Date'] = df.index 
df['Date'] = [time.date() for time in df['Date']] 
l = df.index.values
for i in range(0,len(l)):
    df.loc[l[i], 'DayoftheWeek'] = datetime.datetime.strptime(str(df.loc[l[i], 'Date']), '%Y-%m-%d').strftime('%A') 

days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
Monday = 0 
MonCount = 0
Mon = []
Tuesday = 0
TueCount = 0
Tue = []
Wednesday = 0
WedCount = 0
Example #36
0
obj.describe()
## Correlation and Covariance
import pandas.io.data as web
all_data = {}
for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']:
    all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2000', '1/1/2010')
    
price = DataFrame({tic: data['Adj Close'],
for tic, data in all_data.iteritems()})
price = DataFrame({tic: data['Adj Close'] 
for tic, data in all_data.iteritems()})
price
volume = DataFrame({tic: data['Volume'] 
for tic, data in all_data.iteritems()})
# percent changes of the prices:
returns = price.pct_change()
returns.tails()
returns.tail()
returns.MSFT.corr(returns.IBM) # correlation of the overlapping non-NA
returns.MSFT.cov(returns.IBM) # covariance of the overlapping non-NA
returns.corr()
returns.cov()
returns.corrwith(returns.IBM)
returns.corrwith(volume)
## Unique values, Value counts, and membership
obj = Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])
uniques = obj.unique()
uniques
obj.value_counts()
obj.value_counts() # value frequencies
from pandas import value_counts
Example #37
0
import matplotlib.pyplot as plt
from collections import defaultdict

plt.interactive(True)
names = ['AAPL', 'GOOG', 'MSFT', 'DELL', 'GS', 'MS', 'BAC', 'C']


def get_px(stock, start, end):
    print('Get ' + stock)
    return web.get_data_yahoo(stock, start, end)['Adj Close']


px = DataFrame({n: get_px(n, '1/1/2009', '6/1/2012') for n in names})

px = px.asfreq('B').fillna(method='pad')
rets = px.pct_change()
((1 + rets).cumprod() - 1).plot()
print('block')


def calc_mom(price, lookback, lag):
    mon_ret = price.shift(lag).pct_change(lookback)
    ranks = mon_ret.rank(axis=1, ascending=False)
    demeaned = ranks - ranks.mean(axis=1)
    return demeaned / demeaned.std(axis=1)


compound = lambda x: (1 + x).prod() - 1
daily_sr = lambda x: x.mean() / x.std()

def main():
    """
    Calculation and aggregation of summary statistics
    """

    # Summary of statistics
    # return is not ndarray
    df = DataFrame([[1.4, np.nan],
                    [7.1, -4.5],
                    [np.nan, np.nan],
                    [0.75, -1.3]],
                   index=list('abcd'),
                   columns=['one', 'two'])
    print df
    print df.sum()
    print df.sum(axis=1)
    print df.mean(axis=1) # exclude nan
    print df.mean(axis=1, skipna=False)
    print df.idxmin()
    print df.idxmax()
    print df.cumsum()
    print df.describe()
    # values are not number
    obj = Series(list('aabc') * 4)
    print obj.describe()


    methods = ['count', 'min', 'max', # 'argmin', 'argmax',
               'quantile', 'median', 'mad', 'var', 'std',
               'skew', 'kurt', 'cummin', 'cummax', 'cumprod',
               'diff', 'pct_change']

    for method in methods:
        print u'「{0}」'.format(method)
        print getattr(df, method)()
        print ''

    # Correspond and Covariance
    all_data = {}
    lst = [] # ['AAPL', 'IBM', 'MSFT'] #, 'GOOG']:
    for ticket in lst: #, 'GOOG']:
        # IOError: after 3 tries, Yahoo! did not return a 200
        # for url 'http://ichart.finance.yahoo.com/table.csv?s=GOOG&a=0&b=1&c=2000&d=0&e=1&f=2010&g=d&ignore=.csv'
        all_data[ticket] = pd.io.data.get_data_yahoo(ticket, '1/1/2000', '1/1/2010')
    price = DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()})
    volume = DataFrame({tic: data['Volume'] for tic, data in all_data.iteritems()})
    if all_data:
        returns = price.pct_change()
        print returns.tail()
        print ''
        print returns.MSFT.corr(returns.IBM)
        print returns.MSFT.cov(returns.IBM)
        print ''
        print returns.corr()
        print returns.cov()
        print ''
        print returns.corrwith(returns.IBM)
        print returns.corrwith(volume)

    # unique, frequency, belong
    print '',''
    obj = Series(list('cadaabbcc'))
    uniques = obj.unique()
    print uniques
    print obj.value_counts()
    print pd.value_counts(obj.values, sort=False)
    mask = obj.isin(['b', 'c'])
    print mask
    print obj[mask]

    data = DataFrame({
        'Qu1' : [1,3,4,3,4],
        'Qu2' : [2,3,1,2,3],
        'Qu3' : [1,5,2,4,4],
    })
    print data
    print data.apply(pd.value_counts).fillna(0)