def test_load_cashflow(): """Test simfin.bulk.load_cashflow()""" for dataset, variant, market in iter_all_datasets(datasets='cashflow'): kwargs = _create_kwargs(variant=variant, market=market) sf.load_cashflow(**kwargs) sf.load_cashflow_banks(**kwargs) sf.load_cashflow_insurance(**kwargs)
def get_cashflow() -> pd.DataFrame: """Gets the bulk cashflow statements from SimFin API Downloads the data if you don't already have it """ setup_simfin() df = sf.load_cashflow(variant='quarterly', market='us') return df
def SF_cashflow_statment(self, sec_id: str = 'AAPL', period: str = 'quarterly', market: str = 'us') -> pd.DataFrame: """[summary] Load cash flow statement. Args: sec_id (str, optional): [stock ticker]. Defaults to 'AAPL'. period (str, optional): [annual, quarterly]. Defaults to 'quarterly'. market (str, optional): [us, de, etc]. Defaults to 'us'. Returns: pd.DataFrame: """ df_cashflow = simfin.load_cashflow(variant=period, market=market) df_cashflow = df_cashflow.loc[sec_id] return df_cashflow
def load_financial_data(path: str): sf.set_api_key('free') sf.set_data_dir(path) # Load the full list of companies in the selected market (United States). df_companies = sf.load_companies(market='us') # Load all the industries that are available. df_industries = sf.load_industries() # Load the quarterly Income Statements for all companies in the selected market. df_income = sf.load_income(variant='quarterly', market='us') # Load the quarterly Balance Sheet data for all companies in the selected market. df_balance = sf.load_balance(variant='quarterly', market='us') # Load the quarterly Balance Sheet data for all companies in the selected market. df_cashflow = sf.load_cashflow(variant='quarterly', market='us') return df_companies, df_industries, df_income, df_balance, df_cashflow
import yfinance as yf # Set your API-key for downloading data. This key gets the free data. sf.set_api_key('free') # Set the local directory where data-files are stored. # The directory will be created if it does not already exist. sf.set_data_dir('~/simfin_data/') # NOMBRE EN LA BOLSA company = 'AAPL' # Download the data from the SimFin server and load into a Pandas DataFrame. # annual/quarterly/ttm BALANCE = sf.load_balance(variant='annual', market='us').loc[company, ] CASH_FLOW = sf.load_cashflow(variant='annual', market='us').loc[company, ] INCOME = sf.load_income(variant='annual', market='us').loc[company, ] #PRICE = sf.load_shareprices(variant='daily', market='us').loc[company, ] PRICE = yf.download(tickers=f'{company}', period='10y', interval='1mo') PRICE.reset_index(inplace=True) PRICE = PRICE[PRICE['Date'].dt.month == 12][['Close', 'Date']] INCOME['Date'] = INCOME.index.strftime('%m-%Y') BALANCE['Date'] = BALANCE.index.strftime('%m-%Y') CASH_FLOW['Date'] = CASH_FLOW.index.strftime('%m-%Y') PRICE['Date'] = PRICE['Date'].dt.strftime('%m-%Y') PRICE = PRICE.set_index('Date') INCOME = INCOME.set_index('Date') BALANCE = BALANCE.set_index('Date') CASH_FLOW = CASH_FLOW.set_index('Date')
def load_dataset(refresh_days=1, dataset='general', thresh=0.7, simfin_api_key='free', simfin_directory='simfin_data/', data_directory=DATA_DIR, shareprices_df=''): # Set Simfin Settings sf.set_api_key(simfin_api_key) sf.set_data_dir(simfin_directory) derived_shareprice_df = sf.load_derived_shareprices(variant='latest', market='us') derived_shareprice_df.to_csv(data_directory / 'stock_derived.csv') company_df = sf.load_companies(market='us', refresh_days=1) company_df.to_csv(data_directory / 'company.csv') industry_df = sf.load_industries(refresh_days=1) industry_df.to_csv(data_directory / 'industry.csv') if dataset == 'general': # Load Data from Simfin income_df = sf.load_income(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_quarterly_df = sf.load_income(variant='quarterly', market='us', refresh_days=refresh_days) income_quarterly_df = income_quarterly_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'general_income.csv') balance_df = sf.load_balance(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_quarterly_df = sf.load_balance(variant='quarterly', market='us', refresh_days=refresh_days) balance_quarterly_df = balance_quarterly_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'general_balance.csv') cashflow_df = sf.load_cashflow(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_quarterlay_df = sf.load_cashflow(variant='quarterly', market='us', refresh_days=refresh_days) cashflow_quarterlay_df = cashflow_quarterlay_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'general_cashflow.csv') derived_df = sf.load_derived(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'general_fundamental_derived.csv') cache_args = { 'cache_name': 'financial_signals', 'cache_refresh': refresh_days } fin_signal_df = sf.fin_signals(df_income_ttm=income_df, df_balance_ttm=balance_df, df_cashflow_ttm=cashflow_df, **cache_args) growth_signal_df = sf.growth_signals( df_income_ttm=income_df, df_income_qrt=income_quarterly_df, df_balance_ttm=balance_df, df_balance_qrt=balance_quarterly_df, df_cashflow_ttm=cashflow_df, df_cashflow_qrt=cashflow_quarterlay_df, **cache_args) # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns, growth_signal_df.columns, fin_signal_df. columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(fin_signal_df).join( growth_signal_df).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'general' elif dataset == 'banks': # Load Data from Simfin income_df = sf.load_income_banks(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'banks_income.csv') balance_df = sf.load_balance_banks(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'banks_balance.csv') cashflow_df = sf.load_cashflow_banks(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'banks_cashflow.csv') derived_df = sf.load_derived_banks(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'banks_fundamental_derived.csv') derived_df.groupby('Ticker').last().to_csv( data_directory / 'banks_fundamental_derived.csv') # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'banks' elif dataset == 'insurance': # Load Data from Simfin income_df = sf.load_income_insurance(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_income.csv') balance_df = sf.load_balance_insurance(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_balance.csv') cashflow_df = sf.load_cashflow_insurance(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_cashflow.csv') derived_df = sf.load_derived_insurance(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'insurance_fundamental_derived.csv') # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'insurance' # Drop Columns with more then 1-thresh nan values fundamental_df = fundamental_df.dropna(thresh=int(thresh * len(fundamental_df)), axis=1) # Drop Duplicate Index fundamental_df = fundamental_df[~fundamental_df.index.duplicated( keep='first')] # Replace Report Date with the Publish Date because the Publish Date is when the Fundamentals are known to the Public fundamental_df['Published Date'] = fundamental_df['Publish Date'] fundamental_df = fundamental_df.reset_index().set_index( ['Ticker', 'Publish Date']) df = sf.reindex(df_src=fundamental_df, df_target=shareprices_df, group_index=TICKER, method='ffill').dropna(how='all').join(shareprices_df) # General # Clean Up df = df.drop([ 'SimFinId', 'Currency', 'Fiscal Year', 'Report Date', 'Restated Date', 'Fiscal Period', 'Published Date' ], axis=1) if dataset == 'general': # Remove Share Prices Over Amazon Share Price df = df[df['Close'] <= df.loc['AMZN']['Close'].max()] df = df.dropna(subset=[ 'Shares (Basic)', 'Shares (Diluted)', 'Revenue', 'Earnings Growth' ]) non_per_share_cols = [ 'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date', 'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close', 'Dataset' ] + fin_signal_df.columns.tolist() + growth_signal_df.columns.tolist( ) + derived_df_columns.difference( ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist() else: df = df.dropna( subset=['Shares (Basic)', 'Shares (Diluted)', 'Revenue']) non_per_share_cols = [ 'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date', 'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close', 'Dataset' ] + derived_df_columns.difference( ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist() df = df.replace([np.inf, -np.inf], 0) df = df.fillna(0) per_share_cols = df.columns[~df.columns.isin(non_per_share_cols)] df[per_share_cols] = df[per_share_cols].div(df['Shares (Diluted)'], axis=0) # Add Company and Industry Information and Categorize df = df.join(company_df).merge( industry_df, left_on='IndustryId', right_index=True).drop( columns=['IndustryId', 'Company Name', 'SimFinId']) categorical_features = [ col for col in df.columns if df[col].dtype == 'object' ] encoder = OrdinalEncoder(cols=categorical_features, handle_unknown='ignore', return_df=True).fit(df) df = encoder.transform(df) # Sort df = df.sort_index(level=['Ticker', 'Date'], ascending=[1, 1]) return df
# Set your API-key for downloading data. This key gets the free data. sf.set_api_key('free') # Set the local directory where data-files are stored. # The directory will be created if it does not already exist. sf.set_data_dir('~/simfin_data/') # Download the data from the SimFin server and load into a Pandas DataFrame. df_companies = sf.load_companies(market='us') df_industries = sf.load_industries() # Non financial df_income = sf.load_income(variant='quarterly', market='us') df_balance = sf.load_balance(variant='quarterly', market='us') df_cashflow = sf.load_cashflow(variant='quarterly', market='us') # Banks df_income = sf.load_income_banks(variant='quarterly', market='us') df_balance = sf.load_balance_banks(variant='quarterly', market='us') df_cashflow = sf.load_cashflow_banks(variant='quarterly', market='us') # Insurance df_income = sf.load_income_insurance(variant='quarterly', market='us') df_balance = sf.load_balance_insurance(variant='quarterly', market='us') df_cashflow = sf.load_cashflow_insurance(variant='quarterly', market='us') # Prices df_prices = sf.load_shareprices(variant='daily', market='us') # Print the first rows of the data.
df_income_qrt = sf.load_income(variant='quarterly', market=market) #Contains: Qtrly Earnings Growth = amount by with this quarter earnings exceeds # the same quarter earnings for past year # TTM Balance Sheets. df_balance_ttm = sf.load_balance(variant='ttm', market=market) #Contains: Total Debt = Short Term Debt + Long Term Debt, # Total Debt/Equity = Total Debt / Total Equity, # Total Cash = Cash, Cash Equivalents & Short Term Investments # Total Cash Per Share = Total Cash / Shares Diluted # Total Assets # Current Ratio = Total Current Assets / Total Current Liabilities # TTM Cash-Flow Statements. df_cashflow_ttm = sf.load_cashflow(variant='ttm', market=market) #Contains: Operating Cash Flow = Net Cash from Operating Activities #Contains: Free Clash Flow = Net Cash from Operating Activities + Change in Fixed Assets & Intangibles # Daily Share-Prices. df_prices = sf.load_shareprices(variant='daily', market=market) #Contains: Market Cap = Shares Outstanding * Adj. Close, # Enterprise value = Market Cap + Total Debt - Total Cash # Price To Book value = Close / Book value(Total Equity / Shares Outstanding) # Price / Earnings To Growth (PEG) Ratio = PE / TTM Earnings Growth #Render it for ticker list tickers = ['EMN','RRC','EBAY','ADM','HPQ','NVDA','F','AKAM','ADBE','CME','AMZN','QCOM', 'WMT','CRM','DIS','CERN','AMAT','AMGN','ATVI','CAT','CHK','FDX','HSY','TWX','XOM', 'JWN','FLIR','WHR','BIIB','HP','KO','TGT','MRK','COST','FFIV','NTAP','UNH','FCX','SEE', 'NKE','D','MET','INTU','VZ','PG','VLO','JNPR','PEG','ROST','PDCO','NSC','MKC','OKE',