def getData(): sf.set_api_key('free') # Set the local directory where data-files are stored. # The dir will be created if it does not already exist. sf.set_data_dir('~/simfin_data/') # Load daily share-prices for all companies in USA. # The data is automatically downloaded if you don't have it already. df_prices = sf.load_shareprices(market='us', variant='daily') # Plot the closing share-prices for ticker MSFT. msft_close_values_TEMP = df_prices.loc['MSFT'] print(msft_close_values_TEMP.columns) list_of_stocks = ['AAPL', 'MSFT', 'CLDR', 'CRM', 'TSLA', 'NVDA', 'DAL'] big_df = pd.DataFrame() for stock in list_of_stocks: temp_df = df_prices.loc[stock].tail(100) temp_df = temp_df[["SimFinId", "Close", "Open", "High", "Low"]] big_df = pd.concat([big_df, temp_df]) ultimate_df = big_df return ultimate_df
def main(): log.info("Started...") if path.exists( '/home/scanlom/simfin_data/' ): rmtree( '/home/scanlom/simfin_data/' ) # Set your API-key for downloading data. # If the API-key is 'free' then you will get the free data, # otherwise you will get the data you have paid for. # See www.simfin.com for what data is free and how to buy more. set_api_key('free') # Set the local directory where data-files are stored. # The dir will be created if it does not already exist. set_data_dir('~/simfin_data/') rpt = report() rpt.add_string( simfin_load("income", load_income, simfin_income_by_ticker, delete_simfin_income_by_id, post_simfin_income) ) rpt.add_string( simfin_load("balance", load_balance, simfin_balance_by_ticker, delete_simfin_balance_by_id, post_simfin_balance) ) rpt.add_string( simfin_load("cashflow", load_cashflow, simfin_cashflow_by_ticker, delete_simfin_cashflow_by_id, post_simfin_cashflow) ) subject = 'Blue Lion - Simfin Load - Financials' send_mail_html_self(subject, rpt.get_html()) rpt2 = report() rpt2.add_string( simfin_load_ref_data() ) rpt2.add_string( simfin_load_market_data() ) rpt2.add_string( simfin_load_market_data_historical() ) subject = 'Blue Lion - Simfin Load - Market Data' send_mail_html_self(subject, rpt2.get_html()) log.info("Completed")
def __init__(self, which_API: str = 'yahoo', token: str = None) -> None: """Initalizes a new instance of the DataAPI and connects to the API platform specified Args: which_API (str, optional): currently three options - 'IEX': https://iexcloud.io/ - 'SimFin': https://simfin.com/ - 'yahoo finance' token (str, optional): [token for API]. Defaults to None. """ # Set the attributes depending on different API platform if which_API == "IEX": self.API = which_API self.token = token elif which_API == "SimFin": self.API = which_API if token is None: #// If you are a SimFin+ user, then save your key into the simfin_api_key file. If not, API key will be set to free. self.token = simfin.load_api_key(path='~/simfin_api_key.txt', default_key='free') else: self.token = token simfin.set_api_key(api_key=self.token) simfin.set_data_dir('~/simfin_data/') elif which_API == "yahoo": self.API = which_API self.base_url = self._base_url()
def __init__(self, API: str = "yahoo", token: str = None) -> None: """Initialize an instance of Robo_Data. Args: API (str, optional): [Input API name]. Defaults to "simfin". Two options: - simfin API: simfin - yahoo finance API: yahoo token (str, optional): [Input token. When using free service, no need to add token]. Defaults to None. """ if API == "yahoo": self.API = API self.token = token elif API == "simfin": if token is None: self.token = None else: self.token = token #// required settings before using sf API sf.set_api_key(api_key=self.token) sf.set_data_dir('~/simfin_data/') else: self.token = token
def ingest(): sf.set_api_key(os.getenv('API_KEY_SIMFIN')) # Set the local directory where data-files are stored. # The directory will be created if it does not already exist. sf.set_data_dir('~/simfin_data/') df = sf.load_shareprices(variant='daily', market='us') for y in range(2010, 2021): df_y = df.xs(slice('{y}-01-01'.format(y=y), '{y}-12-31'.format(y=y)), level='Date', drop_level=False) df_y.to_csv('data/daily_simfin_{y}.csv'.format(y=y))
def __init__(self): # Set your API-key for downloading data. # If the API-key is 'free' then you will get the free data, # otherwise you will get the data you have paid for. # See www.simfin.com for what data is free and how to buy more. sf.set_api_key('free') # Set the local directory where data-files are stored. # The dir will be created if it does not already exist. sf.set_data_dir(RAW_DATA_PATH) self.report_name = 'SimFin Data Coverage Report'
def __init__(self, dir: str = os.getcwd()): self._dir = dir dotenv_path = os.path.join('.env') load_dotenv(dotenv_path) sf.set_api_key('d5I8fvwmF29HUbsOwa8l3bUovp6L1NcX') sf.set_data_dir(os.path.join(self._dir, 'simfin')) self._industries = sf.load_industries() self._prices = sf.load_shareprices(refresh_days=0) self._balance = sf.load_balance(variant="quarterly") self._income = sf.load_income(variant="quarterly") self._companies = sf.load_companies()
def __init__(self): # Set your API-key for downloading data. This key gets the free data. simfin.set_api_key('free') # Set the local directory where data-files are stored. # The directory will be created if it does not already exist. simfin.set_data_dir('~/simfin_data/') # Download the data from the SimFin server and load into a Pandas DataFrame. df = simfin.load_companies(market='us') # Print the first rows of the data. print(df.head()) simfin.load_companies(market='us')
def load_financial_data(path: str): sf.set_api_key('free') sf.set_data_dir(path) # Load the full list of companies in the selected market (United States). df_companies = sf.load_companies(market='us') # Load all the industries that are available. df_industries = sf.load_industries() # Load the quarterly Income Statements for all companies in the selected market. df_income = sf.load_income(variant='quarterly', market='us') # Load the quarterly Balance Sheet data for all companies in the selected market. df_balance = sf.load_balance(variant='quarterly', market='us') # Load the quarterly Balance Sheet data for all companies in the selected market. df_cashflow = sf.load_cashflow(variant='quarterly', market='us') return df_companies, df_industries, df_income, df_balance, df_cashflow
def load_shareprices(refresh_days=1, simfin_api_key='free', simfin_directory='simfin_data/'): # Set Simfin Settings sf.set_api_key(simfin_api_key) sf.set_data_dir(simfin_directory) # Used by all datasets shareprices_df = sf.load_shareprices(variant='daily', market='us', refresh_days=refresh_days) # Merge Fundamental with Stock Prices # Downsample Share Prices to Rolling 30 Day End of Month shareprices_df = shareprices_df[['Close']].groupby('Ticker').rolling( 30, min_periods=1).mean().reset_index(0, drop=True) shareprices_df = sf.resample(df=shareprices_df, rule='M', method=lambda x: x.last()) return shareprices_df
bigquery.SchemaField("symbol", "STRING"), bigquery.SchemaField("open", "FLOAT"), bigquery.SchemaField("high", "FLOAT"), bigquery.SchemaField("low", "FLOAT"), bigquery.SchemaField("close", "FLOAT"), bigquery.SchemaField("volume", "FLOAT"), bigquery.SchemaField("volume_weighted_price", "FLOAT"), ] full_table_id = get_full_table_id('{t}_{y}'.format(t=TABLE_ID_DAILY, y=year)) table = bigquery.Table(full_table_id, schema=schema) table = _bigquery_client.create_table(table) # Make an API request. import simfin as sf sf.set_api_key(os.getenv('API_KEY_SIMFIN')) # Set the local directory where data-files are stored. # The directory will be created if it does not already exist. sf.set_data_dir('~/simfin_data/') #df = sf.load_shareprices(variant='daily', market='us') df_l = sf.load_shareprices(variant='latest', market='us') for index, row in df_l.iterrows(): print(index, index[0], index[1].date(), row['Close']) pass
tickers = ['GOOG', 'MSFT', 'FB', 'AAPL', 'AMZN', 'DIS', 'NVDA', 'ADBE', 'JNJ'] tickers = ['GOOG', 'MSFT', 'FB'] periods = ["q1", "q2", "q3", "q4"] year_start = 2012 year_end = 2020 request_url = 'https://simfin.com/api/v2/companies/statements' # variable to store the names of the columns columns = [] # variable to store our data output = [] #%% sf.set_data_dir('./data2/') sf.set_api_key(api_key=API_KEY) data = sf.load(dataset='income', variant='annual', market='us', index=[TICKER, REPORT_DATE], refresh_days=0) #%% # if you don't have a SimFin+ subscription, you can only request data for single companies and one period at a time (with SimFin+, you can request multiple tickers and periods at once) # for ticker in tickers: # # loop through years: # for year in range(year_start, year_end + 1): # # loop through periods # for period in periods:
def setup_simfin(): sf.set_api_key('free') sf.set_data_dir(os.path.join(ROOT_DIR, 'simfin_data'))
def __load_share_data__(self): sf.set_data_dir('~/simfin_data/') self.df_prices = sf.load_shareprices(variant='daily', market='US')
from simfin.names import * import calendar from datetime import timedelta def getDate(d): if d.weekday() == calendar.SATURDAY: return d - timedelta(days=1) elif d.weekday() == calendar.SUNDAY: return d - timedelta(days=2) else: return d # Wohin sollen die temporären Daten geladen werden sf.set_data_dir('pfad/zu/den/simfin/daten') # Kostenlose Variante sf.set_api_key(api_key='free') # Die möglichen Datensätze datasets = ['income', 'balance', 'cashflow'] df_markets = sf.load_markets() # Eine Liste aller verfügbaren Märkte market_list = df_markets.index.values.tolist() df_list = list() # Alle Datensätze durchgehen for ds in datasets: frames = list() # Alle Märkte
import simfin as sf from simfin.names import CLOSE, NET_INCOME, REVENUE,\ NET_PROFIT_MARGIN, SALES_GROWTH,\ ROA, ROE, TOTAL_EQUITY, TOTAL_ASSETS,\ REPORT_DATE, EARNINGS_GROWTH, SHARES_DILUTED,\ NET_INCOME_COMMON, FCF, NET_CASH_OPS, CAPEX, \ PSALES, PE, PFCF # Set your API-key for downloading data. sf.set_api_key('free') # Set the local directory where data-files are stored. # The dir will be created if it does not already exist. sf.set_data_dir('C:/Users/user/Desktop/Systematic Trading/Strategies/MLStrategies/FundamentalDataPred/simfin_data/') ##Features #Market Cap OK with df_prices #Enterprise Value OK with df_prices and df_balance #Trailing P/E OK with df_income and df_prices #Forward P/E Impossible to have #PEG Ratio OK with df_income and df_prices #Price/Sales OK with df_prices and df_income #Price/Book OK with df_prices and df_balance #Enterprise Value/Revenue OK with df_prices and df_balance and df_income #Enterprise Value/EBITDA OK with df_prices and df_balance and df_income #Profit Margin OK with df_income #Operating Margin OK with df_income #Return on Assets OK with df_income and df_balance #Return on Equity OK with df_income and df_balance
@author: ArmelFabrice """ import simfin as sf import pandas as pd from datetime import datetime import numpy as np import ta # Set your API-key for downloading data. sf.set_api_key('free') # Set the local directory where data-files are stored. # The dir will be created if it does not already exist. sf.set_data_dir('YourSimDataDirPath') # Data for USA. market = 'us' # Daily Share-Prices. df_prices = sf.load_shareprices(variant='daily', market=market) #Obtain SP500 tickers import urllib.request from html_table_parser import HTMLTableParser url_snp500 = 'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies' def obtain_parse_wiki_stocks_sp500(url): """Download and parse the Wikipedia list of S&P500
def load_dataset(refresh_days=1, dataset='general', thresh=0.7, simfin_api_key='free', simfin_directory='simfin_data/', data_directory=DATA_DIR, shareprices_df=''): # Set Simfin Settings sf.set_api_key(simfin_api_key) sf.set_data_dir(simfin_directory) derived_shareprice_df = sf.load_derived_shareprices(variant='latest', market='us') derived_shareprice_df.to_csv(data_directory / 'stock_derived.csv') company_df = sf.load_companies(market='us', refresh_days=1) company_df.to_csv(data_directory / 'company.csv') industry_df = sf.load_industries(refresh_days=1) industry_df.to_csv(data_directory / 'industry.csv') if dataset == 'general': # Load Data from Simfin income_df = sf.load_income(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_quarterly_df = sf.load_income(variant='quarterly', market='us', refresh_days=refresh_days) income_quarterly_df = income_quarterly_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'general_income.csv') balance_df = sf.load_balance(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_quarterly_df = sf.load_balance(variant='quarterly', market='us', refresh_days=refresh_days) balance_quarterly_df = balance_quarterly_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'general_balance.csv') cashflow_df = sf.load_cashflow(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_quarterlay_df = sf.load_cashflow(variant='quarterly', market='us', refresh_days=refresh_days) cashflow_quarterlay_df = cashflow_quarterlay_df.sort_index( level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'general_cashflow.csv') derived_df = sf.load_derived(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'general_fundamental_derived.csv') cache_args = { 'cache_name': 'financial_signals', 'cache_refresh': refresh_days } fin_signal_df = sf.fin_signals(df_income_ttm=income_df, df_balance_ttm=balance_df, df_cashflow_ttm=cashflow_df, **cache_args) growth_signal_df = sf.growth_signals( df_income_ttm=income_df, df_income_qrt=income_quarterly_df, df_balance_ttm=balance_df, df_balance_qrt=balance_quarterly_df, df_cashflow_ttm=cashflow_df, df_cashflow_qrt=cashflow_quarterlay_df, **cache_args) # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns, growth_signal_df.columns, fin_signal_df. columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(fin_signal_df).join( growth_signal_df).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'general' elif dataset == 'banks': # Load Data from Simfin income_df = sf.load_income_banks(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'banks_income.csv') balance_df = sf.load_balance_banks(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'banks_balance.csv') cashflow_df = sf.load_cashflow_banks(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'banks_cashflow.csv') derived_df = sf.load_derived_banks(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'banks_fundamental_derived.csv') derived_df.groupby('Ticker').last().to_csv( data_directory / 'banks_fundamental_derived.csv') # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'banks' elif dataset == 'insurance': # Load Data from Simfin income_df = sf.load_income_insurance(variant='ttm', market='us', refresh_days=refresh_days) income_df = income_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) income_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_income.csv') balance_df = sf.load_balance_insurance(variant='ttm', market='us', refresh_days=refresh_days) balance_df = balance_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) balance_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_balance.csv') cashflow_df = sf.load_cashflow_insurance(variant='ttm', market='us', refresh_days=refresh_days) cashflow_df = cashflow_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) cashflow_df.groupby('Ticker').last().to_csv(data_directory / 'insurance_cashflow.csv') derived_df = sf.load_derived_insurance(variant='ttm', market='us', refresh_days=refresh_days) derived_df = derived_df.sort_index(level=['Ticker', 'Report Date'], ascending=[1, 1]) derived_df.groupby('Ticker').last().to_csv( data_directory / 'insurance_fundamental_derived.csv') # Remove Columns that exist in other Fundamental DataFrames balance_columns = balance_df.columns[~balance_df.columns.isin(set( ).union(income_df.columns))] cashflow_columns = cashflow_df.columns[~cashflow_df.columns.isin(set( ).union(income_df.columns))] derived_df_columns = derived_df.columns[~derived_df.columns.isin(set( ).union(income_df.columns))] # Merge the fundamental data into a single dataframe fundamental_df = income_df.join(balance_df[balance_columns]).join( cashflow_df[cashflow_columns]).join(derived_df[derived_df_columns]) fundamental_df['Dataset'] = 'insurance' # Drop Columns with more then 1-thresh nan values fundamental_df = fundamental_df.dropna(thresh=int(thresh * len(fundamental_df)), axis=1) # Drop Duplicate Index fundamental_df = fundamental_df[~fundamental_df.index.duplicated( keep='first')] # Replace Report Date with the Publish Date because the Publish Date is when the Fundamentals are known to the Public fundamental_df['Published Date'] = fundamental_df['Publish Date'] fundamental_df = fundamental_df.reset_index().set_index( ['Ticker', 'Publish Date']) df = sf.reindex(df_src=fundamental_df, df_target=shareprices_df, group_index=TICKER, method='ffill').dropna(how='all').join(shareprices_df) # General # Clean Up df = df.drop([ 'SimFinId', 'Currency', 'Fiscal Year', 'Report Date', 'Restated Date', 'Fiscal Period', 'Published Date' ], axis=1) if dataset == 'general': # Remove Share Prices Over Amazon Share Price df = df[df['Close'] <= df.loc['AMZN']['Close'].max()] df = df.dropna(subset=[ 'Shares (Basic)', 'Shares (Diluted)', 'Revenue', 'Earnings Growth' ]) non_per_share_cols = [ 'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date', 'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close', 'Dataset' ] + fin_signal_df.columns.tolist() + growth_signal_df.columns.tolist( ) + derived_df_columns.difference( ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist() else: df = df.dropna( subset=['Shares (Basic)', 'Shares (Diluted)', 'Revenue']) non_per_share_cols = [ 'Currency', 'Fiscal Year', 'Fiscal Period', 'Published Date', 'Restated Date', 'Shares (Basic)', 'Shares (Diluted)', 'Close', 'Dataset' ] + derived_df_columns.difference( ['EBITDA', 'Total Debt', 'Free Cash Flow']).tolist() df = df.replace([np.inf, -np.inf], 0) df = df.fillna(0) per_share_cols = df.columns[~df.columns.isin(non_per_share_cols)] df[per_share_cols] = df[per_share_cols].div(df['Shares (Diluted)'], axis=0) # Add Company and Industry Information and Categorize df = df.join(company_df).merge( industry_df, left_on='IndustryId', right_index=True).drop( columns=['IndustryId', 'Company Name', 'SimFinId']) categorical_features = [ col for col in df.columns if df[col].dtype == 'object' ] encoder = OrdinalEncoder(cols=categorical_features, handle_unknown='ignore', return_df=True).fit(df) df = encoder.transform(df) # Sort df = df.sort_index(level=['Ticker', 'Date'], ascending=[1, 1]) return df
def __init__(self, key): sf.set_data_dir('~/stock_data/') sf.set_api_key(api_key='free')
#-----------------------------------Set up----------------------------- #import packages for data analysis import pandas as pd import matplotlib as plt import seaborn as sns import numpy as np # Import the main functionality from the SimFin Python API. import simfin as sf # Import names used for easy access to SimFin's data-columns. from simfin.names import * #Set the local directory where data-files are stored. sf.set_data_dir( 'C:/Users/think/Desktop/UVA/2020 Spring/STAT 4996 Capstone\python code/simfin_data/' ) # Set up API key sf.set_api_key(api_key='free') #set plotting style sns.set_style("whitegrid") #--------------------------Load dataframe----------------------------- df_prices = sf.load_shareprices(variant='daily', market='us') #--------------------------Add Columns----------------------------- #simple way df2 = pd.DataFrame() df2[FOO] = df_prices[CLOSE] / df_prices[ADJ_CLOSE]
def get_book(marketvalue, data_source=None): """ Get fundamental data from simfin, Finnhub.io or ychart. simfin: https://github.com/SimFin/simfin-tutorials Finnhub: https://finnhub.io/docs/api https://github.com/Finnhub-Stock-API/finnhub-python ychart: https://ycharts.com/dashboard/ """ if data_source == 'simfin': books = {} books_miss = [] final_tickers = [i for i in marketvalue] sf.set_data_dir(os.getcwd()) balance = sf.load_balance(variant='quarterly', market='us') required_columns = [ 'Total Assets', 'Total Liabilities', 'Total Equity' ] for ticker in final_tickers: try: ticker_book = balance.loc[ticker][required_columns] ticker_book.rename( columns={'Total Equity': '{}_book'.format(ticker)}) books.update({ticker: ticker_book}) except KeyError: books_miss.append(ticker) return books, books_miss elif data_source == 'finnhub': api = 'bta56t748v6oo3au8vi0' finnhub_client = finnhub.Client(api_key=api) books = {} books_miss = [] if isinstance(marketvalue, dict): final_tickers = [i for i in marketvalue] elif isinstance(marketvalue, list): final_tickers = marketvalue.copy() for ticker in final_tickers: data = finnhub_client.financials_reported(symbol=ticker, freq='quarterly')['data'] if len(data) != 0: books.update({ticker: data}) else: books_miss.append(ticker) return books, books_miss elif data_source == 'ychart': book = {} headers = { 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/85.0.4183.83 Safari/537.36" } error = {} for ticker in marketvalue: date_text = [] asset_text = [] liabilities_text = [] equity_text = [] book_miss = [] index_error = [] value_error = [] try: for page in range(1, 3): url = 'https://ycharts.com/financials/{}/balance_sheet/quarterly/'.format( ticker) + str(page) data = requests.get(url, headers=headers) soup = BeautifulSoup(data.text, 'html.parser') if len(soup) != 0: table = soup.findAll('table', {'id': 'report'})[0] # Get date data: date_sub = soup_format(table, 'ychart', 'date') date_text.extend(date_sub) # Get asset data asset_sub = soup_format(table, 'ychart', 'asset') asset_text.extend(asset_sub) # Get liabilities liabilities_sub = soup_format(table, 'ychart', 'liabilities') liabilities_text.extend(liabilities_sub) # Get equity equity_sub = soup_format(table, 'ychart', 'equity') equity_text.extend(equity_sub) else: book_miss.append(ticker) continue except IndexError: index_error.append(ticker) date_text = pd.Series(date_text) asset_text = pd.Series(format_unit(asset_text)) liabilities_text = pd.Series(format_unit(liabilities_text)) equity_text = pd.Series(format_unit(equity_text)) try: df = pd.concat( [date_text, asset_text, liabilities_text, equity_text], axis=1) df.rename(columns={ 0: 'date', 1: 'Total Assets', 2: 'Total Liabilities', 3: 'Total Equity' }, inplace=True) df['date'] = df['date'].astype('datetime64[ns]') df.set_index('date', inplace=True) df.sort_index(axis=0, inplace=True) book.update({ticker: df}) except ValueError: value_error.append(ticker) error.update({ 'IndexError': index_error, 'ValueError': value_error }) return book, book_miss, error