def test_load_income(): """Test simfin.bulk.load_income()""" for dataset, variant, market in iter_all_datasets(datasets='income'): kwargs = _create_kwargs(variant=variant, market=market) sf.load_income(**kwargs) sf.load_income_banks(**kwargs) sf.load_income_insurance(**kwargs)
def get_income() -> pd.DataFrame: """Gets the bulk income statements from SimFin API Downloads the data if you don't already have it """ setup_simfin() df = sf.load_income(variant='quarterly', market='us') return df
def __init__(self, dir: str = os.getcwd()): self._dir = dir dotenv_path = os.path.join('.env') load_dotenv(dotenv_path) sf.set_api_key('d5I8fvwmF29HUbsOwa8l3bUovp6L1NcX') sf.set_data_dir(os.path.join(self._dir, 'simfin')) self._industries = sf.load_industries() self._prices = sf.load_shareprices(refresh_days=0) self._balance = sf.load_balance(variant="quarterly") self._income = sf.load_income(variant="quarterly") self._companies = sf.load_companies()
def load_financial_data(path: str): sf.set_api_key('free') sf.set_data_dir(path) # Load the full list of companies in the selected market (United States). df_companies = sf.load_companies(market='us') # Load all the industries that are available. df_industries = sf.load_industries() # Load the quarterly Income Statements for all companies in the selected market. df_income = sf.load_income(variant='quarterly', market='us') # Load the quarterly Balance Sheet data for all companies in the selected market. df_balance = sf.load_balance(variant='quarterly', market='us') # Load the quarterly Balance Sheet data for all companies in the selected market. df_cashflow = sf.load_cashflow(variant='quarterly', market='us') return df_companies, df_industries, df_income, df_balance, df_cashflow
def SF_income_statement(self, sec_id: str = 'AAPL', period: str = 'quarterly', market: str = 'us') -> pd.DataFrame: """[summary] Load income statement data. Args: sec_id (str, optional): [stock tiker]. Defaults to 'AAPL'. period (str, optional): [annual, quarterly]. Defaults to 'annual'. market (str, optional): [us, de, etc]. Defaults to 'us'. Returns: pd.DataFrame: """ df_income = simfin.load_income( variant=period, market=market, index=[TICKER, REPORT_DATE, FISCAL_PERIOD], parse_dates=[REPORT_DATE, PUBLISH_DATE, RESTATED_DATE]) df_income = df_income.loc[sec_id] return df_income
import requests import pandas as pd import os os.getcwd() os.chdir(r"C:\Users\think\Desktop\UVA\2020 Spring\STAT 4996 Capstone\python code") #Set the local directory where data-files are stored. sf.set_data_dir('C:/Users/think/Desktop/UVA/2020 Spring/STAT 4996 Capstone\python code/simfin_data/') # Set API-key for downloading data. sf.set_api_key('free') api_key = "xCc24BXcpHP6KWBZmERIE4vA95ialBuU" # Download the data from the SimFin server and load into a Pandas DataFrame. df = sf.load_income(variant='quarterly', market='us') # Print the first rows of the data. print(df.head()) # Print all column names of income statement data print(df.columns) #Print all Revenue and Net Income for Microsoft (ticker MSFT). print(df.loc['MSFT', [REVENUE, NET_INCOME]]) # Load daily share-prices for all companies in USA. df_prices = sf.load_shareprices(market='us', variant='daily') # Plot the closing share-prices for ticker MSFT.
# Set your API-key for downloading data. This key gets the free data. sf.set_api_key('free') # Set the local directory where data-files are stored. # The directory will be created if it does not already exist. sf.set_data_dir('~/simfin_data/') # NOMBRE EN LA BOLSA company = 'AAPL' # Download the data from the SimFin server and load into a Pandas DataFrame. # annual/quarterly/ttm BALANCE = sf.load_balance(variant='annual', market='us').loc[company, ] CASH_FLOW = sf.load_cashflow(variant='annual', market='us').loc[company, ] INCOME = sf.load_income(variant='annual', market='us').loc[company, ] #PRICE = sf.load_shareprices(variant='daily', market='us').loc[company, ] PRICE = yf.download(tickers=f'{company}', period='10y', interval='1mo') PRICE.reset_index(inplace=True) PRICE = PRICE[PRICE['Date'].dt.month == 12][['Close', 'Date']] INCOME['Date'] = INCOME.index.strftime('%m-%Y') BALANCE['Date'] = BALANCE.index.strftime('%m-%Y') CASH_FLOW['Date'] = CASH_FLOW.index.strftime('%m-%Y') PRICE['Date'] = PRICE['Date'].dt.strftime('%m-%Y') PRICE = PRICE.set_index('Date') INCOME = INCOME.set_index('Date') BALANCE = BALANCE.set_index('Date') CASH_FLOW = CASH_FLOW.set_index('Date')
def load_dataset(refresh_days=1, dataset='general', thresh=0.7, simfin_api_key='free', simfin_directory='simfin_data/', data_directory=DATA_DIR, shareprices_df=''): # Set Simfin Settings sf.set_api_key(simfin_api_key) sf.set_data_dir(simfin_directory) derived_shareprice_df = sf.load_derived_shareprices(variant='latest', market='us') derived_shareprice_df.to_csv(data_directory / 'stock_derived.csv') company_df = sf.load_companies(market='us', refresh_days=1) company_df.to_csv(data_directory / 'company.csv') industry_df = sf.load_industries(refresh_days=1) industry_df.to_csv(data_directory / 'industry.csv') if dataset == 'general': # Load Data from Simfin income_df = sf.load_income(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_quarterly_df = sf.load_income(variant='quarterly', market='us', refresh_days=refresh_days) income_quarterly_df = income_quarterly_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'general_income.csv') balance_df = sf.load_balance(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_quarterly_df = sf.load_balance(variant='quarterly', market='us', refresh_days=refresh_days) balance_quarterly_df = balance_quarterly_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'general_balance.csv') cashflow_df = sf.load_cashflow(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_quarterlay_df = sf.load_cashflow(variant='quarterly', market='us', refresh_days=refresh_days) cashflow_quarterlay_df = cashflow_quarterlay_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'general_cashflow.csv') derived_df = sf.load_derived(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'general_fundamental_derived.csv') cache_args = { 'cache_name': 'financial_signals', 'cache_refresh': refresh_days } fin_signal_df = sf.fin_signals(df_income_ttm=income_df, df_balance_ttm=balance_df, df_cashflow_ttm=cashflow_df, **cache_args) growth_signal_df = sf.growth_signals( df_income_ttm=income_df, df_income_qrt=income_quarterly_df, df_balance_ttm=balance_df, df_balance_qrt=balance_quarterly_df, df_cashflow_ttm=cashflow_df, df_cashflow_qrt=cashflow_quarterlay_df, **cache_args) # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns, growth_signal_df.columns, fin_signal_df. columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(fin_signal_df).join( growth_signal_df).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'general' elif dataset == 'banks': # Load Data from Simfin income_df = sf.load_income_banks(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'banks_income.csv') balance_df = sf.load_balance_banks(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'banks_balance.csv') cashflow_df = sf.load_cashflow_banks(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'banks_cashflow.csv') derived_df = sf.load_derived_banks(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'banks_fundamental_derived.csv') derived_df.groupby('Ticker').last().to_csv( data_directory / 'banks_fundamental_derived.csv') # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'banks' elif dataset == 'insurance': # Load Data from Simfin income_df = sf.load_income_insurance(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_income.csv') balance_df = sf.load_balance_insurance(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_balance.csv') cashflow_df = sf.load_cashflow_insurance(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_cashflow.csv') derived_df = sf.load_derived_insurance(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'insurance_fundamental_derived.csv') # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'insurance' # Drop Columns with more then 1-thresh nan values fundamental_df = fundamental_df.dropna(thresh=int(thresh * len(fundamental_df)), axis=1) # Drop Duplicate Index fundamental_df = fundamental_df[~fundamental_df.index.duplicated( keep='first')] # Replace Report Date with the Publish Date because the Publish Date is when the Fundamentals are known to the Public fundamental_df['Published Date'] = fundamental_df['Publish Date'] fundamental_df = fundamental_df.reset_index().set_index( ['Ticker', 'Publish Date']) df = sf.reindex(df_src=fundamental_df, df_target=shareprices_df, group_index=TICKER, method='ffill').dropna(how='all').join(shareprices_df) # General # Clean Up df = df.drop([ 'SimFinId', 'Currency', 'Fiscal Year', 'Report Date', 'Restated Date', 'Fiscal Period', 'Published Date' ], axis=1) if dataset == 'general': # Remove Share Prices Over Amazon Share Price df = df[df['Close'] <= df.loc['AMZN']['Close'].max()] df = df.dropna(subset=[ 'Shares (Basic)', 'Shares (Diluted)', 'Revenue', 'Earnings Growth' ]) non_per_share_cols = [ 'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date', 'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close', 'Dataset' ] + fin_signal_df.columns.tolist() + growth_signal_df.columns.tolist( ) + derived_df_columns.difference( ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist() else: df = df.dropna( subset=['Shares (Basic)', 'Shares (Diluted)', 'Revenue']) non_per_share_cols = [ 'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date', 'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close', 'Dataset' ] + derived_df_columns.difference( ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist() df = df.replace([np.inf, -np.inf], 0) df = df.fillna(0) per_share_cols = df.columns[~df.columns.isin(non_per_share_cols)] df[per_share_cols] = df[per_share_cols].div(df['Shares (Diluted)'], axis=0) # Add Company and Industry Information and Categorize df = df.join(company_df).merge( industry_df, left_on='IndustryId', right_index=True).drop( columns=['IndustryId', 'Company Name', 'SimFinId']) categorical_features = [ col for col in df.columns if df[col].dtype == 'object' ] encoder = OrdinalEncoder(cols=categorical_features, handle_unknown='ignore', return_df=True).fit(df) df = encoder.transform(df) # Sort df = df.sort_index(level=['Ticker', 'Date'], ascending=[1, 1]) return df
# Import names used for easy access to SimFin's data-columns. from simfin.names import * #Set the local directory where data-files are stored. sf.set_data_dir( 'C:/Users/think/Desktop/UVA/2020 Spring/STAT 4996 Capstone\python code/simfin_data/' ) # Set up API key sf.set_api_key(api_key='free') #---------------------------------General Load Function-------------------- #load in entire annual,quaterly, monthly US income statement data df_a = sf.load_income(variant='annual', market='us') df_q = sf.load_income(variant='quarterly', market='us') df_m = sf.load_income(variant='ttm', market='us') #check how the dataframe looks like df_a.head() df_q.head() df_m.head() #Plot Microsoft's revenue across years #don't have to add quatation marks when using pyhon shortcut df_q.loc['MSFT'][REVENUE].plot(grid=True) #Load in income statement for banks and insurance companies df = sf.load_income_banks(variant='annual', market='us') df = sf.load_balance_insurance(variant='annual', market='us')
import simfin as sf # Import names used for easy access to SimFin's data-columns. from simfin.names import * #Set the local directory where data-files are stored. sf.set_data_dir('C:/Users/think/Desktop/UVA/2020 Spring/STAT 4996 Capstone\python code/simfin_data/') # Set up API key sf.set_api_key(api_key='free') #set plotting style sns.set_style("whitegrid") #---------------------------------Load Datasets----------------------------- market = 'us' df_income = sf.load_income(variant='annual', market=market) df_prices = sf.load_shareprices(variant='daily', market=market) df_prices_latest = sf.load_shareprices(variant='latest', market=market) tickers = ['AAPL', 'AMZN', 'MSFT'] df_income = df_income.loc[tickers, [REVENUE, NET_INCOME]].copy() df_prices = df_prices.loc[tickers, [CLOSE, ADJ_CLOSE]].copy() df_prices_latest = df_prices_latest.loc[tickers, [CLOSE, ADJ_CLOSE]].copy() #-----------------------------Start resampling------------------------------ #forward-filling the missing values from the value before it df_income.loc['MSFT'] df_income.loc['MSFT'].asfreq(freq='D', method='ffill') df_income.loc['MSFT'].asfreq(freq='D', method='ffill').plot() #asfreq on multiindex
from simfin.names import * #Set the local directory where data-files are stored. sf.set_data_dir( 'C:/Users/think/Desktop/UVA/2020 Spring/STAT 4996 Capstone\python code/simfin_data/' ) # Set up API key sf.set_api_key(api_key='free') #set plotting style sns.set_style("whitegrid") #---------------------------------Load Datasets----------------------------- market = 'us' df_income_ann = sf.load_income(variant='annual', market=market) df_income_qrt = sf.load_income(variant='quarterly', market=market) df_prices = sf.load_shareprices(variant='daily', market=market) tickers = ['AAPL', 'AMZN', 'MSFT'] df_income_ann = df_income_ann.loc[tickers, [REVENUE, NET_INCOME]].copy() df_income_qrt = df_income_qrt.loc[tickers, [REVENUE, NET_INCOME]].copy() df_prices = df_prices.loc[tickers, [CLOSE, ADJ_CLOSE]].copy() #-----------------------------start calculating growth rate-------------- #moves the data one step foward df.shift(periods=1).head() #When we perform arithmetic operations with the original and shifted DataFrames, #they are first aligned by the index-dates. This allows us to calculate relative changes over time. #the two commands below are the same ((df / df.shift(1)) - 1).head() df.pct_change(periods=1).head()
#200-days MA OK with df_prices #Avg 3-months volume with df_prices #Shares outstanding with df_prices #Float = Number of shares publicly available to trade / Don't have access #% Held by insiders Don't have access #% Held by institutions Don't have access # Shares Short (as of) Don't have access #Short ratio Don't have access #Short % of float Don't have access #Shares Short (prior month) Don't have access # Data for USA. market = 'us' # TTM Income Statements. df_income_ttm = sf.load_income(variant='ttm', market=market) #Contains: Revenue, Shares(Diluted), Revenue Per Share, Gross Profit, # Net Income (Common) # Diluted Earnings Per Share(Diluted EPS) = Net Income (Common) / Shares Diluted # Operating Margin = Operating Income (Loss) * 100 / Revenue # Profit Margin = (Revenue - Cost of Revenue) * 100 / Revenue # Net Profit Margin = Net Income * 100 / Revenue # Quarterly Income Statements. df_income_qrt = sf.load_income(variant='quarterly', market=market) #Contains: Qtrly Earnings Growth = amount by with this quarter earnings exceeds # the same quarter earnings for past year # TTM Balance Sheets. df_balance_ttm = sf.load_balance(variant='ttm', market=market) #Contains: Total Debt = Short Term Debt + Long Term Debt,