Example #1
0
def test_load_balance():
    """Test simfin.bulk.load_balance()"""
    for dataset, variant, market in iter_all_datasets(datasets='balance'):
        kwargs = _create_kwargs(variant=variant, market=market)

        sf.load_balance(**kwargs)
        sf.load_balance_banks(**kwargs)
        sf.load_balance_insurance(**kwargs)
Example #2
0
def get_balance() -> pd.DataFrame:
    """Gets the bulk balance statements from SimFin API

  Downloads the data if you don't already have it
  """
    setup_simfin()

    df = sf.load_balance(variant='quarterly', market='us')
    return df
Example #3
0
    def __init__(self, dir: str = os.getcwd()):
        self._dir = dir

        dotenv_path = os.path.join('.env')
        load_dotenv(dotenv_path)

        sf.set_api_key('d5I8fvwmF29HUbsOwa8l3bUovp6L1NcX')
        sf.set_data_dir(os.path.join(self._dir, 'simfin'))

        self._industries = sf.load_industries()
        self._prices = sf.load_shareprices(refresh_days=0)
        self._balance = sf.load_balance(variant="quarterly")
        self._income = sf.load_income(variant="quarterly")
        self._companies = sf.load_companies()
Example #4
0
    def SF_balance_sheet(self,
                         sec_id: str = 'AAPL',
                         period: str = 'quarterly',
                         market: str = 'us') -> pd.DataFrame:
        """[summary] Load balance sheet data.

        Args:
            sec_id (str, optional): [stock ticker]. Defaults to 'AAPL'.
            period (str, optional): [annual, quarterly]. Defaults to 'quarterly'.
            market (str, optional): [us, de, etc]. Defaults to 'us'.

        Returns:
            pd.DataFrame: 
        """

        df_balance = simfin.load_balance(variant=period, market=market)

        df_balance = df_balance.loc[sec_id]

        return df_balance
Example #5
0
def load_financial_data(path: str):

    sf.set_api_key('free')

    sf.set_data_dir(path)

    # Load the full list of companies in the selected market (United States).
    df_companies = sf.load_companies(market='us')

    # Load all the industries that are available.
    df_industries = sf.load_industries()

    # Load the quarterly Income Statements for all companies in the selected market.
    df_income = sf.load_income(variant='quarterly', market='us')

    # Load the quarterly Balance Sheet data for all companies in the selected market.
    df_balance = sf.load_balance(variant='quarterly', market='us')

    # Load the quarterly Balance Sheet data for all companies in the selected market.
    df_cashflow = sf.load_cashflow(variant='quarterly', market='us')

    return df_companies, df_industries, df_income, df_balance, df_cashflow
Example #6
0
import pandas as pd
import yfinance as yf

# Set your API-key for downloading data. This key gets the free data.
sf.set_api_key('free')

# Set the local directory where data-files are stored.
# The directory will be created if it does not already exist.
sf.set_data_dir('~/simfin_data/')

# NOMBRE EN LA BOLSA
company = 'AAPL'

# Download the data from the SimFin server and load into a Pandas DataFrame.
# annual/quarterly/ttm
BALANCE = sf.load_balance(variant='annual', market='us').loc[company, ]
CASH_FLOW = sf.load_cashflow(variant='annual', market='us').loc[company, ]
INCOME = sf.load_income(variant='annual', market='us').loc[company, ]
#PRICE = sf.load_shareprices(variant='daily', market='us').loc[company, ]
PRICE = yf.download(tickers=f'{company}', period='10y', interval='1mo')
PRICE.reset_index(inplace=True)
PRICE = PRICE[PRICE['Date'].dt.month == 12][['Close', 'Date']]

INCOME['Date'] = INCOME.index.strftime('%m-%Y')
BALANCE['Date'] = BALANCE.index.strftime('%m-%Y')
CASH_FLOW['Date'] = CASH_FLOW.index.strftime('%m-%Y')
PRICE['Date'] = PRICE['Date'].dt.strftime('%m-%Y')

PRICE = PRICE.set_index('Date')
INCOME = INCOME.set_index('Date')
BALANCE = BALANCE.set_index('Date')
Example #7
0
import logging

from data.index_composition import SP500_COMP

#SIMFIN_TOKEN="4r5VV5PXW3FR3MRlAUUsXrNGuvFGpEwZ"

#sf.set_api_key(SIMFIN_TOKEN)

# Set the local directory where data-files are stored.
# The dir will be created if it does not already exist.
sf.set_data_dir('./simfin_data/')

# Load the annual Income Statements for all companies in USA.
# The data is automatically downloaded if you don't have it already.
df_income = sf.load_income(variant='quarterly', market='us')
df_balance = sf.load_balance(variant='quarterly', market='us')
#print(df_income.loc['MSFT'].tail())
print(df_balance.index)

symbols= df_balance.index.levels[0]
print(symbols)


#
# roe ={}
df = pd.DataFrame()
# for sym in ['A', 'AA', 'AAC', 'AAL', 'AAMC', 'AAN', 'AAOI', 'AAON', 'AAP', 'AAPL']:
#     try:
#
#         net_income = df_income.loc[sym]['Net Income']
#         total_equity = df_balance.loc[sym]['Total Equity']
def get_book(marketvalue, data_source=None):
    """
    Get fundamental data from simfin, Finnhub.io or ychart.
    simfin:
    https://github.com/SimFin/simfin-tutorials
    Finnhub:
    https://finnhub.io/docs/api
    https://github.com/Finnhub-Stock-API/finnhub-python
    ychart:
    https://ycharts.com/dashboard/
    """
    if data_source == 'simfin':
        books = {}
        books_miss = []

        final_tickers = [i for i in marketvalue]

        sf.set_data_dir(os.getcwd())
        balance = sf.load_balance(variant='quarterly', market='us')
        required_columns = [
            'Total Assets', 'Total Liabilities', 'Total Equity'
        ]

        for ticker in final_tickers:
            try:
                ticker_book = balance.loc[ticker][required_columns]
                ticker_book.rename(
                    columns={'Total Equity': '{}_book'.format(ticker)})
                books.update({ticker: ticker_book})
            except KeyError:
                books_miss.append(ticker)
        return books, books_miss

    elif data_source == 'finnhub':
        api = 'bta56t748v6oo3au8vi0'
        finnhub_client = finnhub.Client(api_key=api)
        books = {}
        books_miss = []

        if isinstance(marketvalue, dict):
            final_tickers = [i for i in marketvalue]
        elif isinstance(marketvalue, list):
            final_tickers = marketvalue.copy()

        for ticker in final_tickers:
            data = finnhub_client.financials_reported(symbol=ticker,
                                                      freq='quarterly')['data']
            if len(data) != 0:
                books.update({ticker: data})
            else:
                books_miss.append(ticker)
        return books, books_miss

    elif data_source == 'ychart':
        book = {}
        headers = {
            'User-Agent':
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/85.0.4183.83 Safari/537.36"
        }
        error = {}

        for ticker in marketvalue:
            date_text = []
            asset_text = []
            liabilities_text = []
            equity_text = []
            book_miss = []
            index_error = []
            value_error = []

            try:
                for page in range(1, 3):
                    url = 'https://ycharts.com/financials/{}/balance_sheet/quarterly/'.format(
                        ticker) + str(page)
                    data = requests.get(url, headers=headers)
                    soup = BeautifulSoup(data.text, 'html.parser')
                    if len(soup) != 0:
                        table = soup.findAll('table', {'id': 'report'})[0]
                        # Get date data:
                        date_sub = soup_format(table, 'ychart', 'date')
                        date_text.extend(date_sub)

                        # Get asset data
                        asset_sub = soup_format(table, 'ychart', 'asset')
                        asset_text.extend(asset_sub)

                        # Get liabilities
                        liabilities_sub = soup_format(table, 'ychart',
                                                      'liabilities')
                        liabilities_text.extend(liabilities_sub)

                        # Get equity
                        equity_sub = soup_format(table, 'ychart', 'equity')
                        equity_text.extend(equity_sub)

                    else:
                        book_miss.append(ticker)
                        continue
            except IndexError:
                index_error.append(ticker)

            date_text = pd.Series(date_text)
            asset_text = pd.Series(format_unit(asset_text))
            liabilities_text = pd.Series(format_unit(liabilities_text))
            equity_text = pd.Series(format_unit(equity_text))

            try:
                df = pd.concat(
                    [date_text, asset_text, liabilities_text, equity_text],
                    axis=1)
                df.rename(columns={
                    0: 'date',
                    1: 'Total Assets',
                    2: 'Total Liabilities',
                    3: 'Total Equity'
                },
                          inplace=True)
                df['date'] = df['date'].astype('datetime64[ns]')
                df.set_index('date', inplace=True)
                df.sort_index(axis=0, inplace=True)
                book.update({ticker: df})
            except ValueError:
                value_error.append(ticker)

            error.update({
                'IndexError': index_error,
                'ValueError': value_error
            })

        return book, book_miss, error
Example #9
0
def load_dataset(refresh_days=1,
                 dataset='general',
                 thresh=0.7,
                 simfin_api_key='free',
                 simfin_directory='simfin_data/',
                 data_directory=DATA_DIR,
                 shareprices_df=''):

    # Set Simfin Settings
    sf.set_api_key(simfin_api_key)
    sf.set_data_dir(simfin_directory)

    derived_shareprice_df = sf.load_derived_shareprices(variant='latest',
                                                        market='us')
    derived_shareprice_df.to_csv(data_directory / 'stock_derived.csv')

    company_df = sf.load_companies(market='us', refresh_days=1)
    company_df.to_csv(data_directory / 'company.csv')

    industry_df = sf.load_industries(refresh_days=1)
    industry_df.to_csv(data_directory / 'industry.csv')

    if dataset == 'general':

        # Load Data from Simfin
        income_df = sf.load_income(variant='ttm',
                                   market='us',
                                   refresh_days=refresh_days)
        income_df = income_df.sort_index(level=['Ticker', 'Report Date'],
                                         ascending=[1, 1])
        income_quarterly_df = sf.load_income(variant='quarterly',
                                             market='us',
                                             refresh_days=refresh_days)
        income_quarterly_df = income_quarterly_df.sort_index(
            level=['Ticker', 'Report Date'], ascending=[1, 1])
        income_df.groupby('Ticker').last().to_csv(data_directory /
                                                  'general_income.csv')

        balance_df = sf.load_balance(variant='ttm',
                                     market='us',
                                     refresh_days=refresh_days)
        balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        balance_quarterly_df = sf.load_balance(variant='quarterly',
                                               market='us',
                                               refresh_days=refresh_days)
        balance_quarterly_df = balance_quarterly_df.sort_index(
            level=['Ticker', 'Report Date'], ascending=[1, 1])
        balance_df.groupby('Ticker').last().to_csv(data_directory /
                                                   'general_balance.csv')

        cashflow_df = sf.load_cashflow(variant='ttm',
                                       market='us',
                                       refresh_days=refresh_days)
        cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'],
                                             ascending=[1, 1])
        cashflow_quarterlay_df = sf.load_cashflow(variant='quarterly',
                                                  market='us',
                                                  refresh_days=refresh_days)
        cashflow_quarterlay_df = cashflow_quarterlay_df.sort_index(
            level=['Ticker', 'Report Date'], ascending=[1, 1])
        cashflow_df.groupby('Ticker').last().to_csv(data_directory /
                                                    'general_cashflow.csv')

        derived_df = sf.load_derived(variant='ttm',
                                     market='us',
                                     refresh_days=refresh_days)
        derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'general_fundamental_derived.csv')

        cache_args = {
            'cache_name': 'financial_signals',
            'cache_refresh': refresh_days
        }

        fin_signal_df = sf.fin_signals(df_income_ttm=income_df,
                                       df_balance_ttm=balance_df,
                                       df_cashflow_ttm=cashflow_df,
                                       **cache_args)

        growth_signal_df = sf.growth_signals(
            df_income_ttm=income_df,
            df_income_qrt=income_quarterly_df,
            df_balance_ttm=balance_df,
            df_balance_qrt=balance_quarterly_df,
            df_cashflow_ttm=cashflow_df,
            df_cashflow_qrt=cashflow_quarterlay_df,
            **cache_args)

        # Remove Columns that exist in other Fundamental DataFrames
        balance_columns = balance_df.columns[~balance_df.columns.isin(set(
        ).union(income_df.columns))]
        cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set(
        ).union(income_df.columns))]
        derived_df_columns = derived_df.columns[~derived_df.columns.isin(set(
        ).union(income_df.columns, growth_signal_df.columns, fin_signal_df.
                columns))]

        # Merge the fundamental data into a single dataframe
        fundamental_df = income_df.join(balance_df[balance_columns]).join(
            cashflow_df[cashflow_columns]).join(fin_signal_df).join(
                growth_signal_df).join(derived_df[derived_df_columns])

        fundamental_df['Dataset'] = 'general'

    elif dataset == 'banks':

        # Load Data from Simfin
        income_df = sf.load_income_banks(variant='ttm',
                                         market='us',
                                         refresh_days=refresh_days)
        income_df = income_df.sort_index(level=['Ticker', 'Report Date'],
                                         ascending=[1, 1])
        income_df.groupby('Ticker').last().to_csv(data_directory /
                                                  'banks_income.csv')

        balance_df = sf.load_balance_banks(variant='ttm',
                                           market='us',
                                           refresh_days=refresh_days)
        balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        balance_df.groupby('Ticker').last().to_csv(data_directory /
                                                   'banks_balance.csv')

        cashflow_df = sf.load_cashflow_banks(variant='ttm',
                                             market='us',
                                             refresh_days=refresh_days)
        cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'],
                                             ascending=[1, 1])
        cashflow_df.groupby('Ticker').last().to_csv(data_directory /
                                                    'banks_cashflow.csv')

        derived_df = sf.load_derived_banks(variant='ttm',
                                           market='us',
                                           refresh_days=refresh_days)
        derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'banks_fundamental_derived.csv')
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'banks_fundamental_derived.csv')

        # Remove Columns that exist in other Fundamental DataFrames
        balance_columns = balance_df.columns[~balance_df.columns.isin(set(
        ).union(income_df.columns))]
        cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set(
        ).union(income_df.columns))]
        derived_df_columns = derived_df.columns[~derived_df.columns.isin(set(
        ).union(income_df.columns))]

        # Merge the fundamental data into a single dataframe
        fundamental_df = income_df.join(balance_df[balance_columns]).join(
            cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns])

        fundamental_df['Dataset'] = 'banks'

    elif dataset == 'insurance':

        # Load Data from Simfin
        income_df = sf.load_income_insurance(variant='ttm',
                                             market='us',
                                             refresh_days=refresh_days)
        income_df = income_df.sort_index(level=['Ticker', 'Report Date'],
                                         ascending=[1, 1])
        income_df.groupby('Ticker').last().to_csv(data_directory /
                                                  'insurance_income.csv')

        balance_df = sf.load_balance_insurance(variant='ttm',
                                               market='us',
                                               refresh_days=refresh_days)
        balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        balance_df.groupby('Ticker').last().to_csv(data_directory /
                                                   'insurance_balance.csv')

        cashflow_df = sf.load_cashflow_insurance(variant='ttm',
                                                 market='us',
                                                 refresh_days=refresh_days)
        cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'],
                                             ascending=[1, 1])
        cashflow_df.groupby('Ticker').last().to_csv(data_directory /
                                                    'insurance_cashflow.csv')

        derived_df = sf.load_derived_insurance(variant='ttm',
                                               market='us',
                                               refresh_days=refresh_days)
        derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'],
                                           ascending=[1, 1])
        derived_df.groupby('Ticker').last().to_csv(
            data_directory / 'insurance_fundamental_derived.csv')

        # Remove Columns that exist in other Fundamental DataFrames
        balance_columns = balance_df.columns[~balance_df.columns.isin(set(
        ).union(income_df.columns))]
        cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set(
        ).union(income_df.columns))]
        derived_df_columns = derived_df.columns[~derived_df.columns.isin(set(
        ).union(income_df.columns))]

        # Merge the fundamental data into a single dataframe
        fundamental_df = income_df.join(balance_df[balance_columns]).join(
            cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns])

        fundamental_df['Dataset'] = 'insurance'

    # Drop Columns with more then 1-thresh nan values
    fundamental_df = fundamental_df.dropna(thresh=int(thresh *
                                                      len(fundamental_df)),
                                           axis=1)

    # Drop Duplicate Index
    fundamental_df = fundamental_df[~fundamental_df.index.duplicated(
        keep='first')]

    # Replace Report Date with the Publish Date because the Publish Date is when the Fundamentals are known to the Public
    fundamental_df['Published Date'] = fundamental_df['Publish Date']
    fundamental_df = fundamental_df.reset_index().set_index(
        ['Ticker', 'Publish Date'])

    df = sf.reindex(df_src=fundamental_df,
                    df_target=shareprices_df,
                    group_index=TICKER,
                    method='ffill').dropna(how='all').join(shareprices_df)

    # General
    # Clean Up
    df = df.drop([
        'SimFinId', 'Currency', 'Fiscal Year', 'Report Date', 'Restated Date',
        'Fiscal Period', 'Published Date'
    ],
                 axis=1)

    if dataset == 'general':
        # Remove Share Prices Over Amazon Share Price
        df = df[df['Close'] <= df.loc['AMZN']['Close'].max()]

        df = df.dropna(subset=[
            'Shares (Basic)', 'Shares (Diluted)', 'Revenue', 'Earnings Growth'
        ])

        non_per_share_cols = [
            'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date',
            'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close',
            'Dataset'
        ] + fin_signal_df.columns.tolist() + growth_signal_df.columns.tolist(
        ) + derived_df_columns.difference(
            ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist()

    else:
        df = df.dropna(
            subset=['Shares (Basic)', 'Shares (Diluted)', 'Revenue'])

        non_per_share_cols = [
            'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date',
            'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close',
            'Dataset'
        ] + derived_df_columns.difference(
            ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist()

    df = df.replace([np.inf, -np.inf], 0)
    df = df.fillna(0)

    per_share_cols = df.columns[~df.columns.isin(non_per_share_cols)]

    df[per_share_cols] = df[per_share_cols].div(df['Shares (Diluted)'], axis=0)

    # Add Company and Industry Information and Categorize
    df = df.join(company_df).merge(
        industry_df, left_on='IndustryId', right_index=True).drop(
            columns=['IndustryId', 'Company Name', 'SimFinId'])

    categorical_features = [
        col for col in df.columns if df[col].dtype == 'object'
    ]

    encoder = OrdinalEncoder(cols=categorical_features,
                             handle_unknown='ignore',
                             return_df=True).fit(df)

    df = encoder.transform(df)

    # Sort
    df = df.sort_index(level=['Ticker', 'Date'], ascending=[1, 1])

    return df
Example #10
0
df_signals['Operating margin ratio'] = round(
    (df_negative['Operating Income (Loss)'] / df_negative['Revenue']) * 100, 2)
df_signals['Interest Coverage'] = round(
    (df_negative['Operating Income (Loss)'] /
     df_negative['Interest Expense, Net']), 2)
df_signals['Taxes paid'] = round(
    (df_negative['Income Tax (Expense) Benefit, Net'] /
     df_negative['Pretax Income (Loss)']) * 100, 2)
df_signals['Net income margin'] = round(
    (df_negative['Net Income'] / df_negative['Revenue']) * 100, 2)
#df_signals.replace(0, np.nan, inplace=True)
#df_signals.replace(-0, 0, inplace=True)

df2 = df_signals.loc[ticker]

df_balance = sf.load_balance(variant='annual', market='us', index=[TICKER])
df_balance = df_balance.drop([
    'Currency', 'SimFinId', 'Fiscal Period', 'Publish Date', 'Shares (Basic)',
    'Report Date'
],
                             axis=1)
df_balance = df_balance.fillna(0)
df_balance = df_balance.apply(lambda x: x / 1000000)
decimals = 0
df_balance['Fiscal Year'] = df_balance['Fiscal Year'].apply(
    lambda x: x * 1000000)
df_balance['Fiscal Year'] = df_balance['Fiscal Year'].apply(
    lambda x: round(x, decimals))
df3 = df_balance.loc[ticker]

#external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
# TTM Income Statements.
df_income_ttm = sf.load_income(variant='ttm', market=market)
#Contains: Revenue, Shares(Diluted), Revenue Per Share, Gross Profit, 
#          Net Income (Common)
#          Diluted Earnings Per Share(Diluted EPS) = Net Income (Common) / Shares Diluted
#          Operating Margin = Operating Income (Loss) * 100 / Revenue
#          Profit Margin = (Revenue - Cost of Revenue) * 100 / Revenue
#          Net Profit Margin = Net Income * 100 / Revenue

# Quarterly Income Statements.
df_income_qrt = sf.load_income(variant='quarterly', market=market)
#Contains: Qtrly Earnings Growth = amount by with this quarter earnings exceeds 
#                                  the same quarter earnings for past year

# TTM Balance Sheets.
df_balance_ttm = sf.load_balance(variant='ttm', market=market)
#Contains: Total Debt = Short Term Debt + Long Term Debt, 
#          Total Debt/Equity = Total Debt / Total Equity,
#          Total Cash = Cash, Cash Equivalents & Short Term Investments
#          Total Cash Per Share = Total Cash / Shares Diluted
#          Total Assets
#          Current Ratio = Total Current Assets / Total Current Liabilities


# TTM Cash-Flow Statements.
df_cashflow_ttm = sf.load_cashflow(variant='ttm', market=market)
#Contains: Operating Cash Flow = Net Cash from Operating Activities
#Contains: Free Clash Flow = Net Cash from Operating Activities + Change in Fixed Assets & Intangibles

# Daily Share-Prices.
df_prices = sf.load_shareprices(variant='daily', market=market)