Ejemplo n.º 1
0
def load_index_data(ticker):
    """
    Load data for a stock-index from several different files
    and combine them into a single Pandas DataFrame.

    - Price is loaded from a Yahoo-file.
    - Dividend, Sales Per Share, and Book-Value Per Share
      are loaded from separate files.

    The Total Return is produced from the share-price and dividend.
    The P/Sales and P/Book ratios are calculated daily.

    Note that dividend-data is usually given quarterly for stock
    indices, but the individual companies pay dividends at different
    days during the quarter. When calculating the Total Return we
    assume the dividend is paid out and reinvested quarterly.
    There is probably a small error from this. We could instead
    spread the quarterly dividend evenly over all the days in
    the quarter and reinvest these small portions daily. Perhaps
    this would create a smaller estimation error. It could be
    tested if this is really a problem or if the estimation error
    is already very small.

    :param ticker:
        Name of the stock-index used in the filenames e.g. "S&P 500"
    :return: Pandas DataFrame with the data.
    """
    # Paths for the data-files.
    path_dividend_per_share = os.path.join(data_dir, ticker + " Dividend Per Share.txt")
    path_sales_per_share = os.path.join(data_dir, ticker + " Sales Per Share.txt")
    path_book_value_per_share = os.path.join(data_dir, ticker + " Book-Value Per Share.txt")

    # Load the data-files.
    price_daily = _load_price_yahoo(ticker=ticker)
    dividend_per_share = _load_data(path=path_dividend_per_share)
    sales_per_share = _load_data(path=path_sales_per_share)
    book_value_per_share = _load_data(path=path_book_value_per_share)

    # Merge price and dividend into a single data-frame.
    df = pd.concat([price_daily, dividend_per_share], axis=1)

    # Only keep the rows where the share-price is defined.
    df.dropna(subset=[SHARE_PRICE], inplace=True)

    # Calculate the Total Return.
    # The price-data from Yahoo does not contain the Total Return
    # for stock indices because it does not reinvest dividends.
    df[TOTAL_RETURN] = total_return(df=df)

    # Add financial data to the data-frame (interpolated daily).
    df[SALES_PER_SHARE] = _resample_daily(sales_per_share)
    df[BOOK_VALUE_PER_SHARE] = _resample_daily(book_value_per_share)

    # Add financial ratios to the data-frame (daily).
    df[PSALES] = df[SHARE_PRICE] / df[SALES_PER_SHARE]
    df[PBOOK] = df[SHARE_PRICE] / df[BOOK_VALUE_PER_SHARE]

    return df
Ejemplo n.º 2
0
def load_index_data(ticker, sales=True, book_value=True, dividend_TTM=True):
    """
    Load data for a stock-index from several different files
    and combine them into a single Pandas DataFrame.

    - Price is loaded from a Yahoo-file.
    - Dividend, Sales Per Share, Book-Value Per Share, etc.
      are loaded from separate files.

    The Total Return is produced from the share-price and dividend.
    The P/Sales and P/Book ratios are calculated daily.

    Note that dividend-data is often given quarterly for stock
    indices, but the individual companies pay dividends at different
    days during the quarter. When calculating the Total Return we
    assume the dividend is paid out and reinvested quarterly.
    There is probably a small error from this. We could instead
    spread the quarterly dividend evenly over all the days in
    the quarter and reinvest these small portions daily. Perhaps
    this would create a smaller estimation error. It could be
    tested if this is really a problem or if the estimation error
    is already very small.

    :param ticker:
        Name of the stock-index used in the filenames e.g. "S&P 500"

    :param sales:
        Boolean whether to load data-file for Sales Per Share.

    :param book_value:
        Boolean whether to load data-file for Book-Value Per Share.

    :param dividend_TTM:
        Boolean whether to load data-file for Dividend Per Share TTM.

    :return:
        Pandas DataFrame with the data.
    """

    # Load price.
    price_daily = _load_price_yahoo(ticker=ticker)

    # Load dividend.
    path = os.path.join(data_dir, ticker + " Dividend Per Share.txt")
    dividend_per_share = _load_data(path=path)

    # Merge price and dividend into a single DataFrame.
    df = pd.concat([price_daily, dividend_per_share], axis=1)

    # Only keep the rows where the share-price is defined.
    df.dropna(subset=[SHARE_PRICE], inplace=True)

    # Calculate the Total Return.
    # The price-data from Yahoo does not contain the Total Return
    # for stock indices because it does not reinvest dividends.
    df[TOTAL_RETURN] = returns.total_return(df=df)

    # Load Sales Per Share data.
    if sales:
        _load_sales_per_share(ticker=ticker, df=df)

    # Load Book-Value Per Share data.
    if book_value:
        _load_book_value_per_share(ticker=ticker, df=df)

    # Load Dividend Per Share TTM data.
    if dividend_TTM:
        _load_dividend_TTM(ticker=ticker, df=df)

    return df