def test_load_balance(): """Test simfin.bulk.load_balance()""" for dataset, variant, market in iter_all_datasets(datasets='balance'): kwargs = _create_kwargs(variant=variant, market=market) sf.load_balance(**kwargs) sf.load_balance_banks(**kwargs) sf.load_balance_insurance(**kwargs)
def get_balance() -> pd.DataFrame: """Gets the bulk balance statements from SimFin API Downloads the data if you don't already have it """ setup_simfin() df = sf.load_balance(variant='quarterly', market='us') return df
def __init__(self, dir: str = os.getcwd()): self._dir = dir dotenv_path = os.path.join('.env') load_dotenv(dotenv_path) sf.set_api_key('d5I8fvwmF29HUbsOwa8l3bUovp6L1NcX') sf.set_data_dir(os.path.join(self._dir, 'simfin')) self._industries = sf.load_industries() self._prices = sf.load_shareprices(refresh_days=0) self._balance = sf.load_balance(variant="quarterly") self._income = sf.load_income(variant="quarterly") self._companies = sf.load_companies()
def SF_balance_sheet(self, sec_id: str = 'AAPL', period: str = 'quarterly', market: str = 'us') -> pd.DataFrame: """[summary] Load balance sheet data. Args: sec_id (str, optional): [stock ticker]. Defaults to 'AAPL'. period (str, optional): [annual, quarterly]. Defaults to 'quarterly'. market (str, optional): [us, de, etc]. Defaults to 'us'. Returns: pd.DataFrame: """ df_balance = simfin.load_balance(variant=period, market=market) df_balance = df_balance.loc[sec_id] return df_balance
def load_financial_data(path: str): sf.set_api_key('free') sf.set_data_dir(path) # Load the full list of companies in the selected market (United States). df_companies = sf.load_companies(market='us') # Load all the industries that are available. df_industries = sf.load_industries() # Load the quarterly Income Statements for all companies in the selected market. df_income = sf.load_income(variant='quarterly', market='us') # Load the quarterly Balance Sheet data for all companies in the selected market. df_balance = sf.load_balance(variant='quarterly', market='us') # Load the quarterly Balance Sheet data for all companies in the selected market. df_cashflow = sf.load_cashflow(variant='quarterly', market='us') return df_companies, df_industries, df_income, df_balance, df_cashflow
import pandas as pd import yfinance as yf # Set your API-key for downloading data. This key gets the free data. sf.set_api_key('free') # Set the local directory where data-files are stored. # The directory will be created if it does not already exist. sf.set_data_dir('~/simfin_data/') # NOMBRE EN LA BOLSA company = 'AAPL' # Download the data from the SimFin server and load into a Pandas DataFrame. # annual/quarterly/ttm BALANCE = sf.load_balance(variant='annual', market='us').loc[company, ] CASH_FLOW = sf.load_cashflow(variant='annual', market='us').loc[company, ] INCOME = sf.load_income(variant='annual', market='us').loc[company, ] #PRICE = sf.load_shareprices(variant='daily', market='us').loc[company, ] PRICE = yf.download(tickers=f'{company}', period='10y', interval='1mo') PRICE.reset_index(inplace=True) PRICE = PRICE[PRICE['Date'].dt.month == 12][['Close', 'Date']] INCOME['Date'] = INCOME.index.strftime('%m-%Y') BALANCE['Date'] = BALANCE.index.strftime('%m-%Y') CASH_FLOW['Date'] = CASH_FLOW.index.strftime('%m-%Y') PRICE['Date'] = PRICE['Date'].dt.strftime('%m-%Y') PRICE = PRICE.set_index('Date') INCOME = INCOME.set_index('Date') BALANCE = BALANCE.set_index('Date')
import logging from data.index_composition import SP500_COMP #SIMFIN_TOKEN="4r5VV5PXW3FR3MRlAUUsXrNGuvFGpEwZ" #sf.set_api_key(SIMFIN_TOKEN) # Set the local directory where data-files are stored. # The dir will be created if it does not already exist. sf.set_data_dir('./simfin_data/') # Load the annual Income Statements for all companies in USA. # The data is automatically downloaded if you don't have it already. df_income = sf.load_income(variant='quarterly', market='us') df_balance = sf.load_balance(variant='quarterly', market='us') #print(df_income.loc['MSFT'].tail()) print(df_balance.index) symbols= df_balance.index.levels[0] print(symbols) # # roe ={} df = pd.DataFrame() # for sym in ['A', 'AA', 'AAC', 'AAL', 'AAMC', 'AAN', 'AAOI', 'AAON', 'AAP', 'AAPL']: # try: # # net_income = df_income.loc[sym]['Net Income'] # total_equity = df_balance.loc[sym]['Total Equity']
def get_book(marketvalue, data_source=None): """ Get fundamental data from simfin, Finnhub.io or ychart. simfin: https://github.com/SimFin/simfin-tutorials Finnhub: https://finnhub.io/docs/api https://github.com/Finnhub-Stock-API/finnhub-python ychart: https://ycharts.com/dashboard/ """ if data_source == 'simfin': books = {} books_miss = [] final_tickers = [i for i in marketvalue] sf.set_data_dir(os.getcwd()) balance = sf.load_balance(variant='quarterly', market='us') required_columns = [ 'Total Assets', 'Total Liabilities', 'Total Equity' ] for ticker in final_tickers: try: ticker_book = balance.loc[ticker][required_columns] ticker_book.rename( columns={'Total Equity': '{}_book'.format(ticker)}) books.update({ticker: ticker_book}) except KeyError: books_miss.append(ticker) return books, books_miss elif data_source == 'finnhub': api = 'bta56t748v6oo3au8vi0' finnhub_client = finnhub.Client(api_key=api) books = {} books_miss = [] if isinstance(marketvalue, dict): final_tickers = [i for i in marketvalue] elif isinstance(marketvalue, list): final_tickers = marketvalue.copy() for ticker in final_tickers: data = finnhub_client.financials_reported(symbol=ticker, freq='quarterly')['data'] if len(data) != 0: books.update({ticker: data}) else: books_miss.append(ticker) return books, books_miss elif data_source == 'ychart': book = {} headers = { 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/85.0.4183.83 Safari/537.36" } error = {} for ticker in marketvalue: date_text = [] asset_text = [] liabilities_text = [] equity_text = [] book_miss = [] index_error = [] value_error = [] try: for page in range(1, 3): url = 'https://ycharts.com/financials/{}/balance_sheet/quarterly/'.format( ticker) + str(page) data = requests.get(url, headers=headers) soup = BeautifulSoup(data.text, 'html.parser') if len(soup) != 0: table = soup.findAll('table', {'id': 'report'})[0] # Get date data: date_sub = soup_format(table, 'ychart', 'date') date_text.extend(date_sub) # Get asset data asset_sub = soup_format(table, 'ychart', 'asset') asset_text.extend(asset_sub) # Get liabilities liabilities_sub = soup_format(table, 'ychart', 'liabilities') liabilities_text.extend(liabilities_sub) # Get equity equity_sub = soup_format(table, 'ychart', 'equity') equity_text.extend(equity_sub) else: book_miss.append(ticker) continue except IndexError: index_error.append(ticker) date_text = pd.Series(date_text) asset_text = pd.Series(format_unit(asset_text)) liabilities_text = pd.Series(format_unit(liabilities_text)) equity_text = pd.Series(format_unit(equity_text)) try: df = pd.concat( [date_text, asset_text, liabilities_text, equity_text], axis=1) df.rename(columns={ 0: 'date', 1: 'Total Assets', 2: 'Total Liabilities', 3: 'Total Equity' }, inplace=True) df['date'] = df['date'].astype('datetime64[ns]') df.set_index('date', inplace=True) df.sort_index(axis=0, inplace=True) book.update({ticker: df}) except ValueError: value_error.append(ticker) error.update({ 'IndexError': index_error, 'ValueError': value_error }) return book, book_miss, error
def load_dataset(refresh_days=1, dataset='general', thresh=0.7, simfin_api_key='free', simfin_directory='simfin_data/', data_directory=DATA_DIR, shareprices_df=''): # Set Simfin Settings sf.set_api_key(simfin_api_key) sf.set_data_dir(simfin_directory) derived_shareprice_df = sf.load_derived_shareprices(variant='latest', market='us') derived_shareprice_df.to_csv(data_directory / 'stock_derived.csv') company_df = sf.load_companies(market='us', refresh_days=1) company_df.to_csv(data_directory / 'company.csv') industry_df = sf.load_industries(refresh_days=1) industry_df.to_csv(data_directory / 'industry.csv') if dataset == 'general': # Load Data from Simfin income_df = sf.load_income(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_quarterly_df = sf.load_income(variant='quarterly', market='us', refresh_days=refresh_days) income_quarterly_df = income_quarterly_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'general_income.csv') balance_df = sf.load_balance(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_quarterly_df = sf.load_balance(variant='quarterly', market='us', refresh_days=refresh_days) balance_quarterly_df = balance_quarterly_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'general_balance.csv') cashflow_df = sf.load_cashflow(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_quarterlay_df = sf.load_cashflow(variant='quarterly', market='us', refresh_days=refresh_days) cashflow_quarterlay_df = cashflow_quarterlay_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'general_cashflow.csv') derived_df = sf.load_derived(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'general_fundamental_derived.csv') cache_args = { 'cache_name': 'financial_signals', 'cache_refresh': refresh_days } fin_signal_df = sf.fin_signals(df_income_ttm=income_df, df_balance_ttm=balance_df, df_cashflow_ttm=cashflow_df, **cache_args) growth_signal_df = sf.growth_signals( df_income_ttm=income_df, df_income_qrt=income_quarterly_df, df_balance_ttm=balance_df, df_balance_qrt=balance_quarterly_df, df_cashflow_ttm=cashflow_df, df_cashflow_qrt=cashflow_quarterlay_df, **cache_args) # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns, growth_signal_df.columns, fin_signal_df. columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(fin_signal_df).join( growth_signal_df).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'general' elif dataset == 'banks': # Load Data from Simfin income_df = sf.load_income_banks(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'banks_income.csv') balance_df = sf.load_balance_banks(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'banks_balance.csv') cashflow_df = sf.load_cashflow_banks(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'banks_cashflow.csv') derived_df = sf.load_derived_banks(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'banks_fundamental_derived.csv') derived_df.groupby('Ticker').last().to_csv( data_directory / 'banks_fundamental_derived.csv') # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'banks' elif dataset == 'insurance': # Load Data from Simfin income_df = sf.load_income_insurance(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_income.csv') balance_df = sf.load_balance_insurance(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_balance.csv') cashflow_df = sf.load_cashflow_insurance(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_cashflow.csv') derived_df = sf.load_derived_insurance(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'insurance_fundamental_derived.csv') # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'insurance' # Drop Columns with more then 1-thresh nan values fundamental_df = fundamental_df.dropna(thresh=int(thresh * len(fundamental_df)), axis=1) # Drop Duplicate Index fundamental_df = fundamental_df[~fundamental_df.index.duplicated( keep='first')] # Replace Report Date with the Publish Date because the Publish Date is when the Fundamentals are known to the Public fundamental_df['Published Date'] = fundamental_df['Publish Date'] fundamental_df = fundamental_df.reset_index().set_index( ['Ticker', 'Publish Date']) df = sf.reindex(df_src=fundamental_df, df_target=shareprices_df, group_index=TICKER, method='ffill').dropna(how='all').join(shareprices_df) # General # Clean Up df = df.drop([ 'SimFinId', 'Currency', 'Fiscal Year', 'Report Date', 'Restated Date', 'Fiscal Period', 'Published Date' ], axis=1) if dataset == 'general': # Remove Share Prices Over Amazon Share Price df = df[df['Close'] <= df.loc['AMZN']['Close'].max()] df = df.dropna(subset=[ 'Shares (Basic)', 'Shares (Diluted)', 'Revenue', 'Earnings Growth' ]) non_per_share_cols = [ 'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date', 'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close', 'Dataset' ] + fin_signal_df.columns.tolist() + growth_signal_df.columns.tolist( ) + derived_df_columns.difference( ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist() else: df = df.dropna( subset=['Shares (Basic)', 'Shares (Diluted)', 'Revenue']) non_per_share_cols = [ 'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date', 'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close', 'Dataset' ] + derived_df_columns.difference( ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist() df = df.replace([np.inf, -np.inf], 0) df = df.fillna(0) per_share_cols = df.columns[~df.columns.isin(non_per_share_cols)] df[per_share_cols] = df[per_share_cols].div(df['Shares (Diluted)'], axis=0) # Add Company and Industry Information and Categorize df = df.join(company_df).merge( industry_df, left_on='IndustryId', right_index=True).drop( columns=['IndustryId', 'Company Name', 'SimFinId']) categorical_features = [ col for col in df.columns if df[col].dtype == 'object' ] encoder = OrdinalEncoder(cols=categorical_features, handle_unknown='ignore', return_df=True).fit(df) df = encoder.transform(df) # Sort df = df.sort_index(level=['Ticker', 'Date'], ascending=[1, 1]) return df
df_signals['Operating margin ratio'] = round( (df_negative['Operating Income (Loss)'] / df_negative['Revenue']) * 100, 2) df_signals['Interest Coverage'] = round( (df_negative['Operating Income (Loss)'] / df_negative['Interest Expense, Net']), 2) df_signals['Taxes paid'] = round( (df_negative['Income Tax (Expense) Benefit, Net'] / df_negative['Pretax Income (Loss)']) * 100, 2) df_signals['Net income margin'] = round( (df_negative['Net Income'] / df_negative['Revenue']) * 100, 2) #df_signals.replace(0, np.nan, inplace=True) #df_signals.replace(-0, 0, inplace=True) df2 = df_signals.loc[ticker] df_balance = sf.load_balance(variant='annual', market='us', index=[TICKER]) df_balance = df_balance.drop([ 'Currency', 'SimFinId', 'Fiscal Period', 'Publish Date', 'Shares (Basic)', 'Report Date' ], axis=1) df_balance = df_balance.fillna(0) df_balance = df_balance.apply(lambda x: x / 1000000) decimals = 0 df_balance['Fiscal Year'] = df_balance['Fiscal Year'].apply( lambda x: x * 1000000) df_balance['Fiscal Year'] = df_balance['Fiscal Year'].apply( lambda x: round(x, decimals)) df3 = df_balance.loc[ticker] #external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
# TTM Income Statements. df_income_ttm = sf.load_income(variant='ttm', market=market) #Contains: Revenue, Shares(Diluted), Revenue Per Share, Gross Profit, # Net Income (Common) # Diluted Earnings Per Share(Diluted EPS) = Net Income (Common) / Shares Diluted # Operating Margin = Operating Income (Loss) * 100 / Revenue # Profit Margin = (Revenue - Cost of Revenue) * 100 / Revenue # Net Profit Margin = Net Income * 100 / Revenue # Quarterly Income Statements. df_income_qrt = sf.load_income(variant='quarterly', market=market) #Contains: Qtrly Earnings Growth = amount by with this quarter earnings exceeds # the same quarter earnings for past year # TTM Balance Sheets. df_balance_ttm = sf.load_balance(variant='ttm', market=market) #Contains: Total Debt = Short Term Debt + Long Term Debt, # Total Debt/Equity = Total Debt / Total Equity, # Total Cash = Cash, Cash Equivalents & Short Term Investments # Total Cash Per Share = Total Cash / Shares Diluted # Total Assets # Current Ratio = Total Current Assets / Total Current Liabilities # TTM Cash-Flow Statements. df_cashflow_ttm = sf.load_cashflow(variant='ttm', market=market) #Contains: Operating Cash Flow = Net Cash from Operating Activities #Contains: Free Clash Flow = Net Cash from Operating Activities + Change in Fixed Assets & Intangibles # Daily Share-Prices. df_prices = sf.load_shareprices(variant='daily', market=market)